mirror of https://gitee.com/openkylin/linux.git
Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3: - Add new thermal sensors for vega asics - Various RAS fixes - Add sysfs interface for memory interface utilization - Use HMM rather than mmu notifier for user pages - Expose xgmi topology via kfd - SR-IOV fixes - Fixes for manual driver reload - Add unique identifier for vega asics - Clean up user fence handling with UVD/VCE/VCN blocks - Convert DC to use core bpc attribute rather than a custom one - Add GWS support for KFD - Vega powerplay improvements - Add CRC support for DCE 12 - SR-IOV support for new security policy - Various cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
This commit is contained in:
commit
91c1ead6ae
|
@ -70,6 +70,26 @@ Interrupt Handling
|
|||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
|
||||
:internal:
|
||||
|
||||
AMDGPU XGMI Support
|
||||
===================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
||||
:doc: AMDGPU XGMI Support
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
|
||||
:internal:
|
||||
|
||||
AMDGPU RAS debugfs control interface
|
||||
====================================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||
:doc: AMDGPU RAS debugfs control interface
|
||||
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
|
||||
:internal:
|
||||
|
||||
|
||||
GPU Power/Thermal Controls and Monitoring
|
||||
=========================================
|
||||
|
||||
|
|
|
@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
|
|||
config DRM_AMDGPU_USERPTR
|
||||
bool "Always enable userptr write support"
|
||||
depends on DRM_AMDGPU
|
||||
select MMU_NOTIFIER
|
||||
depends on ARCH_HAS_HMM
|
||||
select HMM_MIRROR
|
||||
help
|
||||
This option selects CONFIG_MMU_NOTIFIER if it isn't already
|
||||
selected to enabled full userptr support.
|
||||
This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
|
||||
isn't already selected to enabled full userptr support.
|
||||
|
||||
config DRM_AMDGPU_GART_DEBUGFS
|
||||
bool "Allow GART access through debugfs"
|
||||
|
|
|
@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
|||
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
|
||||
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
|
||||
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
|
||||
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
|
||||
amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
|
||||
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
|
||||
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
|
||||
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
|
||||
|
@ -173,7 +173,7 @@ endif
|
|||
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
|
||||
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
|
||||
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
|
||||
amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
|
||||
amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
|
||||
|
||||
include $(FULL_AMD_PATH)/powerplay/Makefile
|
||||
|
||||
|
|
|
@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
|
|||
extern int amdgpu_hw_i2c;
|
||||
extern int amdgpu_pcie_gen2;
|
||||
extern int amdgpu_msi;
|
||||
extern int amdgpu_lockup_timeout;
|
||||
extern int amdgpu_dpm;
|
||||
extern int amdgpu_fw_load_type;
|
||||
extern int amdgpu_aspm;
|
||||
|
@ -211,6 +210,7 @@ struct amdgpu_irq_src;
|
|||
struct amdgpu_fpriv;
|
||||
struct amdgpu_bo_va_mapping;
|
||||
struct amdgpu_atif;
|
||||
struct kfd_vm_fault_info;
|
||||
|
||||
enum amdgpu_cp_irq {
|
||||
AMDGPU_CP_IRQ_GFX_EOP = 0,
|
||||
|
@ -415,6 +415,7 @@ struct amdgpu_fpriv {
|
|||
};
|
||||
|
||||
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
|
||||
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
unsigned size, struct amdgpu_ib *ib);
|
||||
|
@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
|
|||
uint64_t *count1);
|
||||
/* do we need to reset the asic at init time (e.g., kexec) */
|
||||
bool (*need_reset_on_init)(struct amdgpu_device *adev);
|
||||
/* PCIe replay counter */
|
||||
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
|
|||
u32 ref_and_mask_sdma1;
|
||||
};
|
||||
|
||||
struct amdgpu_mmio_remap {
|
||||
u32 reg_offset;
|
||||
resource_size_t bus_addr;
|
||||
};
|
||||
|
||||
struct amdgpu_nbio_funcs {
|
||||
const struct nbio_hdp_flush_reg *hdp_flush_reg;
|
||||
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
|
||||
|
@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
|
|||
void (*ih_control)(struct amdgpu_device *adev);
|
||||
void (*init_registers)(struct amdgpu_device *adev);
|
||||
void (*detect_hw_virt)(struct amdgpu_device *adev);
|
||||
void (*remap_hdp_registers)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_df_funcs {
|
||||
|
@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
|
|||
u32 *flags);
|
||||
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
|
||||
bool enable);
|
||||
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_enable);
|
||||
int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_disable);
|
||||
void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
|
||||
uint64_t *count);
|
||||
};
|
||||
/* Define the HW IP blocks will be used in driver , add more if necessary */
|
||||
enum amd_hw_ip_block_type {
|
||||
|
@ -764,6 +779,7 @@ struct amdgpu_device {
|
|||
void __iomem *rmmio;
|
||||
/* protects concurrent MM_INDEX/DATA based register access */
|
||||
spinlock_t mmio_idx_lock;
|
||||
struct amdgpu_mmio_remap rmmio_remap;
|
||||
/* protects concurrent SMC based register access */
|
||||
spinlock_t smc_idx_lock;
|
||||
amdgpu_rreg_t smc_rreg;
|
||||
|
@ -936,6 +952,13 @@ struct amdgpu_device {
|
|||
struct work_struct xgmi_reset_work;
|
||||
|
||||
bool in_baco_reset;
|
||||
|
||||
long gfx_timeout;
|
||||
long sdma_timeout;
|
||||
long video_timeout;
|
||||
long compute_timeout;
|
||||
|
||||
uint64_t unique_id;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
||||
|
@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
|
||||
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
|
||||
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
|
||||
|
||||
/* Common functions */
|
||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
|
@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
|
|||
const u32 array_size);
|
||||
|
||||
bool amdgpu_device_is_px(struct drm_device *dev);
|
||||
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *peer_adev);
|
||||
|
||||
/* atpx handler */
|
||||
#if defined(CONFIG_VGA_SWITCHEROO)
|
||||
void amdgpu_register_atpx_handler(void);
|
||||
|
|
|
@ -25,8 +25,10 @@
|
|||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||
|
||||
|
@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
};
|
||||
|
||||
/* this is going to have a few of the MSBs set that we need to
|
||||
* clear */
|
||||
* clear
|
||||
*/
|
||||
bitmap_complement(gpu_resources.queue_bitmap,
|
||||
adev->gfx.mec.queue_bitmap,
|
||||
KGD_MAX_QUEUES);
|
||||
|
@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
gpu_resources.queue_bitmap);
|
||||
|
||||
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
|
||||
* nbits is not compile time constant */
|
||||
* nbits is not compile time constant
|
||||
*/
|
||||
last_valid_bit = 1 /* only first MEC can have compute queues */
|
||||
* adev->gfx.mec.num_pipe_per_mec
|
||||
* adev->gfx.mec.num_queue_per_pipe;
|
||||
|
@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
|||
amdgpu_bo_unref(&(bo));
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
struct amdgpu_bo_param bp;
|
||||
int r;
|
||||
|
||||
memset(&bp, 0, sizeof(bp));
|
||||
bp.size = size;
|
||||
bp.byte_align = 1;
|
||||
bp.domain = AMDGPU_GEM_DOMAIN_GWS;
|
||||
bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
||||
bp.type = ttm_bo_type_device;
|
||||
bp.resv = NULL;
|
||||
|
||||
r = amdgpu_bo_create(adev, &bp, &bo);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"failed to allocate gws BO for amdkfd (%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
*mem_obj = bo;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
|
||||
{
|
||||
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
|
||||
|
||||
amdgpu_bo_unref(&bo);
|
||||
}
|
||||
|
||||
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
||||
enum kgd_engine_type type)
|
||||
{
|
||||
|
@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
|
|||
|
||||
return adev->gmc.xgmi.hive_id;
|
||||
}
|
||||
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
|
||||
{
|
||||
struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)dst;
|
||||
int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
|
||||
|
||||
if (ret < 0) {
|
||||
DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
peer_adev->gmc.xgmi.physical_node_id, ret);
|
||||
ret = 0;
|
||||
}
|
||||
return (uint8_t)ret;
|
||||
}
|
||||
|
||||
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
return adev->rmmio_remap.bus_addr;
|
||||
}
|
||||
|
||||
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
return adev->gds.gws_size;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
|
||||
uint32_t vmid, uint64_t gpu_addr,
|
||||
|
|
|
@ -61,7 +61,6 @@ struct kgd_mem {
|
|||
|
||||
atomic_t invalid;
|
||||
struct amdkfd_process_info *process_info;
|
||||
struct page **user_pages;
|
||||
|
||||
struct amdgpu_sync sync;
|
||||
|
||||
|
@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
|||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9);
|
||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
|
||||
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
|
||||
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
|
||||
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
|
||||
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
|
||||
enum kgd_engine_type type);
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||
|
@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
|||
uint32_t *flags);
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
||||
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
|
||||
uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
|
||||
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
|
||||
|
||||
#define read_user_wptr(mmptr, wptr, dst) \
|
||||
({ \
|
||||
|
|
|
@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
|
|||
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
||||
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
|
||||
|
||||
pr_debug("kfd: sdma base address: 0x%x\n", retval);
|
||||
pr_debug("sdma base address: 0x%x\n", retval);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
|
@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
|
|||
|
||||
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
|
||||
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
|
||||
pr_debug("kfd: sdma base address: 0x%x\n", retval);
|
||||
pr_debug("sdma base address: 0x%x\n", retval);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
|
@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
|
|||
|
||||
lock_srbm(kgd, 0, 0, 0, vmid);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
|
||||
/* APE1 no longer exists on GFX9 */
|
||||
|
||||
unlock_srbm(kgd);
|
||||
|
@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
|||
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
|
||||
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
|
||||
((mec << 5) | (pipe << 3) | queue_id | 0x80));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
|
||||
}
|
||||
|
||||
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
|
||||
|
@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
|||
|
||||
for (reg = hqd_base;
|
||||
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
|
||||
WREG32(reg, mqd_hqd[reg - hqd_base]);
|
||||
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
|
||||
|
||||
|
||||
/* Activate doorbell logic before triggering WPTR poll. */
|
||||
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
|
||||
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
|
||||
|
||||
if (wptr) {
|
||||
/* Don't read wptr with get_user because the user
|
||||
|
@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
|
|||
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
|
||||
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
|
||||
lower_32_bits(guessed_wptr));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
|
||||
upper_32_bits(guessed_wptr));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
|
||||
lower_32_bits((uintptr_t)wptr));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
|
||||
upper_32_bits((uintptr_t)wptr));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
|
||||
get_queue_mask(adev, pipe_id, queue_id));
|
||||
}
|
||||
|
||||
/* Start the EOP fetcher */
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
|
||||
REG_SET_FIELD(m->cp_hqd_eop_rptr,
|
||||
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
|
||||
|
||||
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
|
||||
|
||||
release_queue(kgd);
|
||||
|
||||
|
@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
|||
acquire_queue(kgd, pipe_id, queue_id);
|
||||
|
||||
if (m->cp_hqd_vmid == 0)
|
||||
WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
|
||||
WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
|
||||
|
||||
switch (reset_type) {
|
||||
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
|
||||
|
@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
|
|||
break;
|
||||
}
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
|
||||
|
||||
end_jiffies = (utimeout * HZ / 1000) + jiffies;
|
||||
while (true) {
|
||||
|
@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
|||
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
|
||||
}
|
||||
|
||||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
|
||||
/* Use legacy mode tlb invalidation.
|
||||
*
|
||||
* Currently on Raven the code below is broken for anything but
|
||||
* legacy mode due to a MMHUB power gating problem. A workaround
|
||||
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
|
||||
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
|
||||
* bit.
|
||||
*
|
||||
* TODO 1: agree on the right set of invalidation registers for
|
||||
* KFD use. Use the last one for now. Invalidate both GC and
|
||||
* MMHUB.
|
||||
*
|
||||
* TODO 2: support range-based invalidation, requires kfg2kgd
|
||||
* interface change
|
||||
*/
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
||||
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
|
||||
uint32_t flush_type)
|
||||
{
|
||||
signed long r;
|
||||
uint32_t seq;
|
||||
|
@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
|||
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
|
||||
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
|
||||
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
|
||||
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
|
||||
PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
|
||||
amdgpu_fence_emit_polling(ring, &seq);
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock(&adev->gfx.kiq.ring_lock);
|
||||
|
@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
int vmid;
|
||||
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
|
||||
uint32_t flush_type = 0;
|
||||
|
||||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
if (adev->gmc.xgmi.num_physical_nodes &&
|
||||
adev->asic_type == CHIP_VEGA20)
|
||||
flush_type = 2;
|
||||
|
||||
if (ring->sched.ready)
|
||||
return invalidate_tlbs_with_kiq(adev, pasid);
|
||||
return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
|
||||
|
@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
|||
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
|
||||
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
|
||||
== pasid) {
|
||||
write_vmid_invalidate_request(kgd, vmid);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid,
|
||||
flush_type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
|
|||
return 0;
|
||||
}
|
||||
|
||||
write_vmid_invalidate_request(kgd, vmid);
|
||||
/* Use legacy mode tlb invalidation.
|
||||
*
|
||||
* Currently on Raven the code below is broken for anything but
|
||||
* legacy mode due to a MMHUB power gating problem. A workaround
|
||||
* is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
|
||||
* == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
|
||||
* bit.
|
||||
*
|
||||
* TODO 1: agree on the right set of invalidation registers for
|
||||
* KFD use. Use the last one for now. Invalidate both GC and
|
||||
* MMHUB.
|
||||
*
|
||||
* TODO 2: support range-based invalidation, requires kfg2kgd
|
||||
* interface change
|
||||
*/
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
|||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
|
||||
|
||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||
|
@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
|
|||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
|
||||
SE_BROADCAST_WRITES, 1);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
|
||||
/* Special VM and GART address alignment needed for VI pre-Fiji due to
|
||||
* a HW bug.
|
||||
|
@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
|||
mutex_unlock(&process_info->lock);
|
||||
}
|
||||
|
||||
static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
|
||||
struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct ttm_validate_buffer *bo_list_entry;
|
||||
|
||||
bo_list_entry = &mem->validate_list;
|
||||
mutex_lock(&process_info->lock);
|
||||
list_del(&bo_list_entry->head);
|
||||
mutex_unlock(&process_info->lock);
|
||||
}
|
||||
|
||||
/* Initializes user pages. It registers the MMU notifier and validates
|
||||
* the userptr BO in the GTT domain.
|
||||
*
|
||||
|
@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* If no restore worker is running concurrently, user_pages
|
||||
* should not be allocated
|
||||
*/
|
||||
WARN(mem->user_pages, "Leaking user_pages array");
|
||||
|
||||
mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||
sizeof(struct page *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!mem->user_pages) {
|
||||
pr_err("%s: Failed to allocate pages array\n", __func__);
|
||||
ret = -ENOMEM;
|
||||
goto unregister_out;
|
||||
}
|
||||
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
|
||||
goto free_out;
|
||||
goto unregister_out;
|
||||
}
|
||||
|
||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (ret) {
|
||||
pr_err("%s: Failed to reserve BO\n", __func__);
|
||||
|
@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
|
|||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
release_out:
|
||||
if (ret)
|
||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
||||
free_out:
|
||||
kvfree(mem->user_pages);
|
||||
mem->user_pages = NULL;
|
||||
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||
unregister_out:
|
||||
if (ret)
|
||||
amdgpu_mn_unregister(bo);
|
||||
|
@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
|
|||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
|
||||
|
@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
|
|||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
i = 0;
|
||||
|
@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
|||
AMDGPU_FENCE_OWNER_KFD, false);
|
||||
if (ret)
|
||||
goto wait_pd_fail;
|
||||
ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
|
||||
if (ret)
|
||||
goto reserve_shared_fail;
|
||||
amdgpu_bo_fence(vm->root.base.bo,
|
||||
&vm->process_info->eviction_fence->base, true);
|
||||
amdgpu_bo_unreserve(vm->root.base.bo);
|
||||
|
@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
|
|||
|
||||
return 0;
|
||||
|
||||
reserve_shared_fail:
|
||||
wait_pd_fail:
|
||||
validate_pd_fail:
|
||||
amdgpu_bo_unreserve(vm->root.base.bo);
|
||||
|
@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
if (!offset || !*offset)
|
||||
return -EINVAL;
|
||||
user_addr = *offset;
|
||||
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
|
||||
} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
|
||||
ALLOC_MEM_FLAGS_MMIO_REMAP)) {
|
||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
bo_type = ttm_bo_type_sg;
|
||||
|
@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
|
||||
if (user_addr) {
|
||||
ret = init_user_pages(*mem, current->mm, user_addr);
|
||||
if (ret) {
|
||||
mutex_lock(&avm->process_info->lock);
|
||||
list_del(&(*mem)->validate_list.head);
|
||||
mutex_unlock(&avm->process_info->lock);
|
||||
if (ret)
|
||||
goto allocate_init_user_pages_failed;
|
||||
}
|
||||
}
|
||||
|
||||
if (offset)
|
||||
|
@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
return 0;
|
||||
|
||||
allocate_init_user_pages_failed:
|
||||
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
|
||||
amdgpu_bo_unref(&bo);
|
||||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_limit;
|
||||
|
@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
|||
list_del(&bo_list_entry->head);
|
||||
mutex_unlock(&process_info->lock);
|
||||
|
||||
/* Free user pages if necessary */
|
||||
if (mem->user_pages) {
|
||||
pr_debug("%s: Freeing user_pages array\n", __func__);
|
||||
if (mem->user_pages[0])
|
||||
release_pages(mem->user_pages,
|
||||
mem->bo->tbo.ttm->num_pages);
|
||||
kvfree(mem->user_pages);
|
||||
}
|
||||
|
||||
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
|||
/* Free the sync object */
|
||||
amdgpu_sync_free(&mem->sync);
|
||||
|
||||
/* If the SG is not NULL, it's one we created for a doorbell
|
||||
* BO. We need to free it.
|
||||
/* If the SG is not NULL, it's one we created for a doorbell or mmio
|
||||
* remap BO. We need to free it.
|
||||
*/
|
||||
if (mem->bo->tbo.sg) {
|
||||
sg_free_table(mem->bo->tbo.sg);
|
||||
|
@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
|||
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
|
||||
is_invalid_userptr);
|
||||
if (ret) {
|
||||
pr_err("Failed to map radeon bo to gpuvm\n");
|
||||
pr_err("Failed to map bo to gpuvm\n");
|
||||
goto map_bo_to_gpuvm_failed;
|
||||
}
|
||||
|
||||
|
@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
|||
|
||||
bo = mem->bo;
|
||||
|
||||
if (!mem->user_pages) {
|
||||
mem->user_pages =
|
||||
kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||
sizeof(struct page *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!mem->user_pages) {
|
||||
pr_err("%s: Failed to allocate pages array\n",
|
||||
__func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else if (mem->user_pages[0]) {
|
||||
release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
|
||||
}
|
||||
|
||||
/* Get updated user pages */
|
||||
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
|
||||
mem->user_pages);
|
||||
bo->tbo.ttm->pages);
|
||||
if (ret) {
|
||||
mem->user_pages[0] = NULL;
|
||||
bo->tbo.ttm->pages[0] = NULL;
|
||||
pr_info("%s: Failed to get user pages: %d\n",
|
||||
__func__, ret);
|
||||
/* Pretend it succeeded. It will fail later
|
||||
|
@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
|
|||
* stalled user mode queues.
|
||||
*/
|
||||
}
|
||||
|
||||
/* Mark the BO as valid unless it was invalidated
|
||||
* again concurrently
|
||||
*/
|
||||
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove invalid userptr BOs from hmm track list
|
||||
*
|
||||
* Stop HMM track the userptr update
|
||||
*/
|
||||
static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct kgd_mem *mem, *tmp_mem;
|
||||
struct amdgpu_bo *bo;
|
||||
|
||||
list_for_each_entry_safe(mem, tmp_mem,
|
||||
&process_info->userptr_inval_list,
|
||||
validate_list.head) {
|
||||
bo = mem->bo;
|
||||
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate invalid userptr BOs
|
||||
*
|
||||
* Validates BOs on the userptr_inval_list, and moves them back to the
|
||||
|
@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
GFP_KERNEL);
|
||||
if (!pd_bo_list_entries) {
|
||||
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out_no_mem;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&resv_list);
|
||||
|
@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
|
||||
WARN(!list_empty(&duplicates), "Duplicates should be empty");
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_free;
|
||||
|
||||
amdgpu_sync_create(&sync);
|
||||
|
||||
|
@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
|
||||
bo = mem->bo;
|
||||
|
||||
/* Copy pages array and validate the BO if we got user pages */
|
||||
if (mem->user_pages[0]) {
|
||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
||||
mem->user_pages);
|
||||
/* Validate the BO if we got user pages */
|
||||
if (bo->tbo.ttm->pages[0]) {
|
||||
amdgpu_bo_placement_from_domain(bo, mem->domain);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (ret) {
|
||||
|
@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
}
|
||||
}
|
||||
|
||||
/* Validate succeeded, now the BO owns the pages, free
|
||||
* our copy of the pointer array. Put this BO back on
|
||||
* the userptr_valid_list. If we need to revalidate
|
||||
* it, we need to start from scratch.
|
||||
*/
|
||||
kvfree(mem->user_pages);
|
||||
mem->user_pages = NULL;
|
||||
list_move_tail(&mem->validate_list.head,
|
||||
&process_info->userptr_valid_list);
|
||||
|
||||
/* Stop HMM track the userptr update. We dont check the return
|
||||
* value for concurrent CPU page table update because we will
|
||||
* reschedule the restore worker if process_info->evicted_bos
|
||||
* is updated.
|
||||
*/
|
||||
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||
|
||||
/* Update mapping. If the BO was not validated
|
||||
* (because we couldn't get user pages), this will
|
||||
* clear the page table entries, which will result in
|
||||
|
@ -1897,8 +1876,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
ttm_eu_backoff_reservation(&ticket, &resv_list);
|
||||
amdgpu_sync_wait(&sync, false);
|
||||
amdgpu_sync_free(&sync);
|
||||
out:
|
||||
out_free:
|
||||
kfree(pd_bo_list_entries);
|
||||
out_no_mem:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
|
|||
* hanging. No point trying again.
|
||||
*/
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
untrack_invalid_user_pages(process_info);
|
||||
mutex_unlock(&process_info->lock);
|
||||
mmput(mm);
|
||||
put_task_struct(usertask);
|
||||
|
@ -2130,3 +2112,88 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
kfree(pd_bo_list);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
|
||||
{
|
||||
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
|
||||
struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
|
||||
int ret;
|
||||
|
||||
if (!info || !gws)
|
||||
return -EINVAL;
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->bo = amdgpu_bo_ref(gws_bo);
|
||||
(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
|
||||
(*mem)->process_info = process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
|
||||
/* Validate gws bo the first time it is added to process */
|
||||
mutex_lock(&(*mem)->process_info->lock);
|
||||
ret = amdgpu_bo_reserve(gws_bo, false);
|
||||
if (unlikely(ret)) {
|
||||
pr_err("Reserve gws bo failed %d\n", ret);
|
||||
goto bo_reservation_failure;
|
||||
}
|
||||
|
||||
ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
|
||||
if (ret) {
|
||||
pr_err("GWS BO validate failed %d\n", ret);
|
||||
goto bo_validation_failure;
|
||||
}
|
||||
/* GWS resource is shared b/t amdgpu and amdkfd
|
||||
* Add process eviction fence to bo so they can
|
||||
* evict each other.
|
||||
*/
|
||||
amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
|
||||
amdgpu_bo_unreserve(gws_bo);
|
||||
mutex_unlock(&(*mem)->process_info->lock);
|
||||
|
||||
return ret;
|
||||
|
||||
bo_validation_failure:
|
||||
amdgpu_bo_unreserve(gws_bo);
|
||||
bo_reservation_failure:
|
||||
mutex_unlock(&(*mem)->process_info->lock);
|
||||
amdgpu_sync_free(&(*mem)->sync);
|
||||
remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
|
||||
amdgpu_bo_unref(&gws_bo);
|
||||
mutex_destroy(&(*mem)->lock);
|
||||
kfree(*mem);
|
||||
*mem = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
|
||||
{
|
||||
int ret;
|
||||
struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
|
||||
struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
|
||||
struct amdgpu_bo *gws_bo = kgd_mem->bo;
|
||||
|
||||
/* Remove BO from process's validate list so restore worker won't touch
|
||||
* it anymore
|
||||
*/
|
||||
remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
|
||||
|
||||
ret = amdgpu_bo_reserve(gws_bo, false);
|
||||
if (unlikely(ret)) {
|
||||
pr_err("Reserve gws bo failed %d\n", ret);
|
||||
//TODO add BO back to validate_list?
|
||||
return ret;
|
||||
}
|
||||
amdgpu_amdkfd_remove_eviction_fence(gws_bo,
|
||||
process_info->eviction_fence);
|
||||
amdgpu_bo_unreserve(gws_bo);
|
||||
amdgpu_sync_free(&kgd_mem->sync);
|
||||
amdgpu_bo_unref(&gws_bo);
|
||||
mutex_destroy(&kgd_mem->lock);
|
||||
kfree(mem);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
|
|||
return -ENOMEM;
|
||||
|
||||
kref_init(&list->refcount);
|
||||
list->gds_obj = adev->gds.gds_gfx_bo;
|
||||
list->gws_obj = adev->gds.gws_gfx_bo;
|
||||
list->oa_obj = adev->gds.oa_gfx_bo;
|
||||
list->gds_obj = NULL;
|
||||
list->gws_obj = NULL;
|
||||
list->oa_obj = NULL;
|
||||
|
||||
array = amdgpu_bo_list_array_entry(list, 0);
|
||||
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
|
||||
|
|
|
@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
|
|||
struct amdgpu_bo_va *bo_va;
|
||||
uint32_t priority;
|
||||
struct page **user_pages;
|
||||
int user_invalidated;
|
||||
bool user_invalidated;
|
||||
};
|
||||
|
||||
struct amdgpu_bo_list {
|
||||
|
|
|
@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
|
|||
p->uf_entry.tv.bo = &bo->tbo;
|
||||
/* One for TTM and one for the CS job */
|
||||
p->uf_entry.tv.num_shared = 2;
|
||||
p->uf_entry.user_pages = NULL;
|
||||
|
||||
drm_gem_object_put_unlocked(gobj);
|
||||
|
||||
|
@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
|
|||
if (usermm && usermm != current->mm)
|
||||
return -EPERM;
|
||||
|
||||
/* Check if we have user pages and nobody bound the BO already */
|
||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
|
||||
lobj->user_pages) {
|
||||
if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
|
||||
lobj->user_invalidated && lobj->user_pages) {
|
||||
amdgpu_bo_placement_from_domain(bo,
|
||||
AMDGPU_GEM_DOMAIN_CPU);
|
||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
|
||||
lobj->user_pages);
|
||||
binding_userptr = true;
|
||||
|
@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_bo *gds;
|
||||
struct amdgpu_bo *gws;
|
||||
struct amdgpu_bo *oa;
|
||||
unsigned tries = 10;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&p->validated);
|
||||
|
@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
|
||||
list_add(&p->uf_entry.tv.head, &p->validated);
|
||||
|
||||
while (1) {
|
||||
struct list_head need_pages;
|
||||
/* Get userptr backing pages. If pages are updated after registered
|
||||
* in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
|
||||
* amdgpu_ttm_backend_bind() to flush and invalidate new pages
|
||||
*/
|
||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||
bool userpage_invalidated = false;
|
||||
int i;
|
||||
|
||||
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
|
||||
&duplicates);
|
||||
if (unlikely(r != 0)) {
|
||||
if (r != -ERESTARTSYS)
|
||||
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
|
||||
goto error_free_pages;
|
||||
e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
|
||||
sizeof(struct page *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!e->user_pages) {
|
||||
DRM_ERROR("calloc failure\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&need_pages);
|
||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||
|
||||
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
|
||||
&e->user_invalidated) && e->user_pages) {
|
||||
|
||||
/* We acquired a page array, but somebody
|
||||
* invalidated it. Free it and try again
|
||||
*/
|
||||
release_pages(e->user_pages,
|
||||
bo->tbo.ttm->num_pages);
|
||||
kvfree(e->user_pages);
|
||||
e->user_pages = NULL;
|
||||
}
|
||||
|
||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
|
||||
!e->user_pages) {
|
||||
list_del(&e->tv.head);
|
||||
list_add(&e->tv.head, &need_pages);
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
}
|
||||
r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
|
||||
if (r) {
|
||||
kvfree(e->user_pages);
|
||||
e->user_pages = NULL;
|
||||
return r;
|
||||
}
|
||||
|
||||
if (list_empty(&need_pages))
|
||||
break;
|
||||
|
||||
/* Unreserve everything again. */
|
||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||
|
||||
/* We tried too many times, just abort */
|
||||
if (!--tries) {
|
||||
r = -EDEADLK;
|
||||
DRM_ERROR("deadlock in %s\n", __func__);
|
||||
goto error_free_pages;
|
||||
}
|
||||
|
||||
/* Fill the page arrays for all userptrs. */
|
||||
list_for_each_entry(e, &need_pages, tv.head) {
|
||||
struct ttm_tt *ttm = e->tv.bo->ttm;
|
||||
|
||||
e->user_pages = kvmalloc_array(ttm->num_pages,
|
||||
sizeof(struct page*),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!e->user_pages) {
|
||||
r = -ENOMEM;
|
||||
DRM_ERROR("calloc failure in %s\n", __func__);
|
||||
goto error_free_pages;
|
||||
}
|
||||
|
||||
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
|
||||
kvfree(e->user_pages);
|
||||
e->user_pages = NULL;
|
||||
goto error_free_pages;
|
||||
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
|
||||
if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
|
||||
userpage_invalidated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
e->user_invalidated = userpage_invalidated;
|
||||
}
|
||||
|
||||
/* And try again. */
|
||||
list_splice(&need_pages, &p->validated);
|
||||
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
|
||||
&duplicates);
|
||||
if (unlikely(r != 0)) {
|
||||
if (r != -ERESTARTSYS)
|
||||
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
||||
|
@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
error_validate:
|
||||
if (r)
|
||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||
|
||||
error_free_pages:
|
||||
|
||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||
if (!e->user_pages)
|
||||
continue;
|
||||
|
||||
release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
|
||||
kvfree(e->user_pages);
|
||||
}
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
|||
j++;
|
||||
}
|
||||
|
||||
/* UVD & VCE fw doesn't support user fences */
|
||||
/* MM engine doesn't support user fences */
|
||||
ring = to_amdgpu_ring(parser->entity->rq->sched);
|
||||
if (parser->job->uf_addr && (
|
||||
ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
|
||||
ring->funcs->type == AMDGPU_RING_TYPE_VCE))
|
||||
if (parser->job->uf_addr && ring->funcs->no_user_fence)
|
||||
return -EINVAL;
|
||||
|
||||
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
|
||||
|
@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|||
struct amdgpu_bo_list_entry *e;
|
||||
struct amdgpu_job *job;
|
||||
uint64_t seq;
|
||||
|
||||
int r;
|
||||
|
||||
job = p->job;
|
||||
|
@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|||
if (r)
|
||||
goto error_unlock;
|
||||
|
||||
/* No memory allocation is allowed while holding the mn lock */
|
||||
/* No memory allocation is allowed while holding the mn lock.
|
||||
* p->mn is hold until amdgpu_cs_submit is finished and fence is added
|
||||
* to BOs.
|
||||
*/
|
||||
amdgpu_mn_lock(p->mn);
|
||||
|
||||
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
|
||||
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
|
||||
*/
|
||||
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||
|
||||
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
|
||||
r = -ERESTARTSYS;
|
||||
goto error_abort;
|
||||
}
|
||||
r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||
}
|
||||
if (r) {
|
||||
r = -EAGAIN;
|
||||
goto error_abort;
|
||||
}
|
||||
|
||||
job->owner = p->filp;
|
||||
|
@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
|
||||
out:
|
||||
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {
|
|||
"LAST",
|
||||
};
|
||||
|
||||
/**
|
||||
* DOC: pcie_replay_count
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reporting the total number
|
||||
* of PCIe replays (NAKs)
|
||||
* The file pcie_replay_count is used for this and returns the total
|
||||
* number of replays as a sum of the NAKs generated and NAKs received
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
|
||||
amdgpu_device_get_pcie_replay_count, NULL);
|
||||
|
||||
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
|
||||
|
||||
/**
|
||||
|
@ -910,8 +932,10 @@ static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
|
|||
* Validates certain module parameters and updates
|
||||
* the associated values used by the driver (all asics).
|
||||
*/
|
||||
static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||
static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (amdgpu_sched_jobs < 4) {
|
||||
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
|
||||
amdgpu_sched_jobs);
|
||||
|
@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
|
|||
amdgpu_vram_page_split = 1024;
|
||||
}
|
||||
|
||||
if (amdgpu_lockup_timeout == 0) {
|
||||
dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
|
||||
amdgpu_lockup_timeout = 10000;
|
||||
ret = amdgpu_device_get_job_timeout_settings(adev);
|
||||
if (ret) {
|
||||
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
|
|||
r = amdgpu_virt_request_full_gpu(adev, true);
|
||||
if (r)
|
||||
return -EAGAIN;
|
||||
|
||||
/* query the reg access mode at the very beginning */
|
||||
amdgpu_virt_init_reg_access_mode(adev);
|
||||
}
|
||||
|
||||
adev->pm.pp_feature = amdgpu_pp_feature_mask;
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
|
||||
|
||||
/* Read BIOS */
|
||||
if (!amdgpu_get_bios(adev))
|
||||
return -EINVAL;
|
||||
|
||||
r = amdgpu_atombios_init(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->num_ip_blocks; i++) {
|
||||
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
|
||||
DRM_ERROR("disabled ip block: %d <%s>\n",
|
||||
|
@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
|
|||
if (adev->ip_blocks[i].status.hw)
|
||||
continue;
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
|
||||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
|
||||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
|
||||
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
|
||||
if (r) {
|
||||
|
@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
mutex_init(&adev->lock_reset);
|
||||
mutex_init(&adev->virt.dpm_mutex);
|
||||
|
||||
amdgpu_device_check_arguments(adev);
|
||||
r = amdgpu_device_check_arguments(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
spin_lock_init(&adev->mmio_idx_lock);
|
||||
spin_lock_init(&adev->smc_idx_lock);
|
||||
|
@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
goto fence_driver_init;
|
||||
}
|
||||
|
||||
/* Read BIOS */
|
||||
if (!amdgpu_get_bios(adev)) {
|
||||
r = -EINVAL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
r = amdgpu_atombios_init(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "amdgpu_atombios_init failed\n");
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
/* detect if we are with an SRIOV vbios */
|
||||
amdgpu_device_detect_sriov_bios(adev);
|
||||
|
||||
|
@ -2672,6 +2703,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
if (r)
|
||||
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
|
||||
|
||||
r = amdgpu_ucode_sysfs_init(adev);
|
||||
if (r)
|
||||
DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
|
||||
|
||||
r = amdgpu_debugfs_gem_init(adev);
|
||||
if (r)
|
||||
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
|
||||
|
@ -2712,7 +2747,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
}
|
||||
|
||||
/* must succeed. */
|
||||
amdgpu_ras_post_init(adev);
|
||||
amdgpu_ras_resume(adev);
|
||||
|
||||
r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "Could not create pcie_replay_count");
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
|
|||
adev->rmmio = NULL;
|
||||
amdgpu_device_doorbell_fini(adev);
|
||||
amdgpu_debugfs_regs_cleanup(adev);
|
||||
device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
|
||||
amdgpu_ucode_sysfs_fini(adev);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
|
|||
|
||||
amdgpu_amdkfd_suspend(adev);
|
||||
|
||||
amdgpu_ras_suspend(adev);
|
||||
|
||||
r = amdgpu_device_ip_suspend_phase1(adev);
|
||||
|
||||
/* evict vram memory */
|
||||
|
@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
|
|||
|
||||
drm_kms_helper_poll_enable(dev);
|
||||
|
||||
amdgpu_ras_resume(adev);
|
||||
|
||||
/*
|
||||
* Most of the connector probing functions try to acquire runtime pm
|
||||
* refs to ensure that the GPU is powered on when connector polling is
|
||||
|
@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
|||
if (vram_lost)
|
||||
amdgpu_device_fill_reset_magic(tmp_adev);
|
||||
|
||||
r = amdgpu_device_ip_late_init(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
/* must succeed. */
|
||||
amdgpu_ras_resume(tmp_adev);
|
||||
|
||||
/* Update PSP FW topology after reset */
|
||||
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
|
@ -3695,43 +3749,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
|
||||
enum pci_bus_speed *speed,
|
||||
enum pcie_link_width *width)
|
||||
{
|
||||
struct pci_dev *pdev = adev->pdev;
|
||||
enum pci_bus_speed cur_speed;
|
||||
enum pcie_link_width cur_width;
|
||||
u32 ret = 1;
|
||||
|
||||
*speed = PCI_SPEED_UNKNOWN;
|
||||
*width = PCIE_LNK_WIDTH_UNKNOWN;
|
||||
|
||||
while (pdev) {
|
||||
cur_speed = pcie_get_speed_cap(pdev);
|
||||
cur_width = pcie_get_width_cap(pdev);
|
||||
ret = pcie_bandwidth_available(adev->pdev, NULL,
|
||||
NULL, &cur_width);
|
||||
if (!ret)
|
||||
cur_width = PCIE_LNK_WIDTH_RESRV;
|
||||
|
||||
if (cur_speed != PCI_SPEED_UNKNOWN) {
|
||||
if (*speed == PCI_SPEED_UNKNOWN)
|
||||
*speed = cur_speed;
|
||||
else if (cur_speed < *speed)
|
||||
*speed = cur_speed;
|
||||
}
|
||||
|
||||
if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
|
||||
if (*width == PCIE_LNK_WIDTH_UNKNOWN)
|
||||
*width = cur_width;
|
||||
else if (cur_width < *width)
|
||||
*width = cur_width;
|
||||
}
|
||||
pdev = pci_upstream_bridge(pdev);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
|
||||
*
|
||||
|
@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
|
|||
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
|
||||
return;
|
||||
|
||||
amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
|
||||
&platform_link_width);
|
||||
pcie_bandwidth_available(adev->pdev, NULL,
|
||||
&platform_speed_cap, &platform_link_width);
|
||||
|
||||
if (adev->pm.pcie_gen_mask == 0) {
|
||||
/* asic caps */
|
||||
|
|
|
@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
|
|||
amdgpu_dither_enum_list, sz);
|
||||
|
||||
if (amdgpu_device_has_dc_support(adev)) {
|
||||
adev->mode_info.max_bpc_property =
|
||||
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
|
||||
if (!adev->mode_info.max_bpc_property)
|
||||
return -ENOMEM;
|
||||
adev->mode_info.abm_level_property =
|
||||
drm_property_create_range(adev->ddev, 0,
|
||||
"abm level", 0, 4);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2012 Advanced Micro Devices, Inc.
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
|
|||
* Returns:
|
||||
* 0 on success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
|
@ -137,6 +138,235 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
__reservation_object_make_exclusive(struct reservation_object *obj)
|
||||
{
|
||||
struct dma_fence **fences;
|
||||
unsigned int count;
|
||||
int r;
|
||||
|
||||
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
|
||||
return 0;
|
||||
|
||||
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (count == 0) {
|
||||
/* Now that was unexpected. */
|
||||
} else if (count == 1) {
|
||||
reservation_object_add_excl_fence(obj, fences[0]);
|
||||
dma_fence_put(fences[0]);
|
||||
kfree(fences);
|
||||
} else {
|
||||
struct dma_fence_array *array;
|
||||
|
||||
array = dma_fence_array_create(count, fences,
|
||||
dma_fence_context_alloc(1), 0,
|
||||
false);
|
||||
if (!array)
|
||||
goto err_fences_put;
|
||||
|
||||
reservation_object_add_excl_fence(obj, &array->base);
|
||||
dma_fence_put(&array->base);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_fences_put:
|
||||
while (count--)
|
||||
dma_fence_put(fences[count]);
|
||||
kfree(fences);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @attach: DMA-buf attachment
|
||||
*
|
||||
* Makes sure that the shared DMA buffer can be accessed by the target device.
|
||||
* For now, simply pins it to the GTT domain, where it should be accessible by
|
||||
* all DMA devices.
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure.
|
||||
*/
|
||||
static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
|
||||
struct dma_buf_attachment *attach)
|
||||
{
|
||||
struct drm_gem_object *obj = dma_buf->priv;
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
long r;
|
||||
|
||||
r = drm_gem_map_attach(dma_buf, attach);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_reserve(bo, false);
|
||||
if (unlikely(r != 0))
|
||||
goto error_detach;
|
||||
|
||||
|
||||
if (attach->dev->driver != adev->dev->driver) {
|
||||
/*
|
||||
* We only create shared fences for internal use, but importers
|
||||
* of the dmabuf rely on exclusive fences for implicitly
|
||||
* tracking write hazards. As any of the current fences may
|
||||
* correspond to a write, we need to convert all existing
|
||||
* fences on the reservation object into a single exclusive
|
||||
* fence.
|
||||
*/
|
||||
r = __reservation_object_make_exclusive(bo->tbo.resv);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
}
|
||||
|
||||
/* pin buffer into GTT */
|
||||
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
|
||||
if (attach->dev->driver != adev->dev->driver)
|
||||
bo->prime_shared_count++;
|
||||
|
||||
error_unreserve:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
error_detach:
|
||||
if (r)
|
||||
drm_gem_map_detach(dma_buf, attach);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @attach: DMA-buf attachment
|
||||
*
|
||||
* This is called when a shared DMA buffer no longer needs to be accessible by
|
||||
* another device. For now, simply unpins the buffer from GTT.
|
||||
*/
|
||||
static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
|
||||
struct dma_buf_attachment *attach)
|
||||
{
|
||||
struct drm_gem_object *obj = dma_buf->priv;
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
int ret = 0;
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (unlikely(ret != 0))
|
||||
goto error;
|
||||
|
||||
amdgpu_bo_unpin(bo);
|
||||
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
|
||||
bo->prime_shared_count--;
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
error:
|
||||
drm_gem_map_detach(dma_buf, attach);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
|
||||
* @obj: GEM BO
|
||||
*
|
||||
* Returns:
|
||||
* The BO's reservation object.
|
||||
*/
|
||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
|
||||
return bo->tbo.resv;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @direction: Direction of DMA transfer
|
||||
*
|
||||
* This is called before CPU access to the shared DMA buffer's memory. If it's
|
||||
* a read access, the buffer is moved to the GTT domain if possible, for optimal
|
||||
* CPU read performance.
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure.
|
||||
*/
|
||||
static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
struct ttm_operation_ctx ctx = { true, false };
|
||||
u32 domain = amdgpu_display_supported_domains(adev);
|
||||
int ret;
|
||||
bool reads = (direction == DMA_BIDIRECTIONAL ||
|
||||
direction == DMA_FROM_DEVICE);
|
||||
|
||||
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
|
||||
return 0;
|
||||
|
||||
/* move to gtt */
|
||||
ret = amdgpu_bo_reserve(bo, false);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
}
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
.attach = amdgpu_dma_buf_map_attach,
|
||||
.detach = amdgpu_dma_buf_map_detach,
|
||||
.map_dma_buf = drm_gem_map_dma_buf,
|
||||
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
||||
.release = drm_gem_dmabuf_release,
|
||||
.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
|
||||
.mmap = drm_gem_dmabuf_mmap,
|
||||
.vmap = drm_gem_dmabuf_vmap,
|
||||
.vunmap = drm_gem_dmabuf_vunmap,
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
|
||||
* @dev: DRM device
|
||||
* @gobj: GEM BO
|
||||
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
|
||||
*
|
||||
* The main work is done by the &drm_gem_prime_export helper, which in turn
|
||||
* uses &amdgpu_gem_prime_res_obj.
|
||||
*
|
||||
* Returns:
|
||||
* Shared DMA buffer representing the GEM BO from the given device.
|
||||
*/
|
||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||
struct drm_gem_object *gobj,
|
||||
int flags)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
|
||||
struct dma_buf *buf;
|
||||
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
|
||||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
buf = drm_gem_prime_export(dev, gobj, flags);
|
||||
if (!IS_ERR(buf)) {
|
||||
buf->file->f_mapping = dev->anon_inode->i_mapping;
|
||||
buf->ops = &amdgpu_dmabuf_ops;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
|
||||
* implementation
|
||||
|
@ -188,235 +418,6 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
|
|||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int
|
||||
__reservation_object_make_exclusive(struct reservation_object *obj)
|
||||
{
|
||||
struct dma_fence **fences;
|
||||
unsigned int count;
|
||||
int r;
|
||||
|
||||
if (!reservation_object_get_list(obj)) /* no shared fences to convert */
|
||||
return 0;
|
||||
|
||||
r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (count == 0) {
|
||||
/* Now that was unexpected. */
|
||||
} else if (count == 1) {
|
||||
reservation_object_add_excl_fence(obj, fences[0]);
|
||||
dma_fence_put(fences[0]);
|
||||
kfree(fences);
|
||||
} else {
|
||||
struct dma_fence_array *array;
|
||||
|
||||
array = dma_fence_array_create(count, fences,
|
||||
dma_fence_context_alloc(1), 0,
|
||||
false);
|
||||
if (!array)
|
||||
goto err_fences_put;
|
||||
|
||||
reservation_object_add_excl_fence(obj, &array->base);
|
||||
dma_fence_put(&array->base);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_fences_put:
|
||||
while (count--)
|
||||
dma_fence_put(fences[count]);
|
||||
kfree(fences);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @attach: DMA-buf attachment
|
||||
*
|
||||
* Makes sure that the shared DMA buffer can be accessed by the target device.
|
||||
* For now, simply pins it to the GTT domain, where it should be accessible by
|
||||
* all DMA devices.
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure.
|
||||
*/
|
||||
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
|
||||
struct dma_buf_attachment *attach)
|
||||
{
|
||||
struct drm_gem_object *obj = dma_buf->priv;
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
long r;
|
||||
|
||||
r = drm_gem_map_attach(dma_buf, attach);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_bo_reserve(bo, false);
|
||||
if (unlikely(r != 0))
|
||||
goto error_detach;
|
||||
|
||||
|
||||
if (attach->dev->driver != adev->dev->driver) {
|
||||
/*
|
||||
* We only create shared fences for internal use, but importers
|
||||
* of the dmabuf rely on exclusive fences for implicitly
|
||||
* tracking write hazards. As any of the current fences may
|
||||
* correspond to a write, we need to convert all existing
|
||||
* fences on the reservation object into a single exclusive
|
||||
* fence.
|
||||
*/
|
||||
r = __reservation_object_make_exclusive(bo->tbo.resv);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
}
|
||||
|
||||
/* pin buffer into GTT */
|
||||
r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
|
||||
if (attach->dev->driver != adev->dev->driver)
|
||||
bo->prime_shared_count++;
|
||||
|
||||
error_unreserve:
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
error_detach:
|
||||
if (r)
|
||||
drm_gem_map_detach(dma_buf, attach);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @attach: DMA-buf attachment
|
||||
*
|
||||
* This is called when a shared DMA buffer no longer needs to be accessible by
|
||||
* another device. For now, simply unpins the buffer from GTT.
|
||||
*/
|
||||
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
|
||||
struct dma_buf_attachment *attach)
|
||||
{
|
||||
struct drm_gem_object *obj = dma_buf->priv;
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
int ret = 0;
|
||||
|
||||
ret = amdgpu_bo_reserve(bo, true);
|
||||
if (unlikely(ret != 0))
|
||||
goto error;
|
||||
|
||||
amdgpu_bo_unpin(bo);
|
||||
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
|
||||
bo->prime_shared_count--;
|
||||
amdgpu_bo_unreserve(bo);
|
||||
|
||||
error:
|
||||
drm_gem_map_detach(dma_buf, attach);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
|
||||
* @obj: GEM BO
|
||||
*
|
||||
* Returns:
|
||||
* The BO's reservation object.
|
||||
*/
|
||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||
|
||||
return bo->tbo.resv;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
|
||||
* @dma_buf: Shared DMA buffer
|
||||
* @direction: Direction of DMA transfer
|
||||
*
|
||||
* This is called before CPU access to the shared DMA buffer's memory. If it's
|
||||
* a read access, the buffer is moved to the GTT domain if possible, for optimal
|
||||
* CPU read performance.
|
||||
*
|
||||
* Returns:
|
||||
* 0 on success or a negative error code on failure.
|
||||
*/
|
||||
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
struct ttm_operation_ctx ctx = { true, false };
|
||||
u32 domain = amdgpu_display_supported_domains(adev);
|
||||
int ret;
|
||||
bool reads = (direction == DMA_BIDIRECTIONAL ||
|
||||
direction == DMA_FROM_DEVICE);
|
||||
|
||||
if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
|
||||
return 0;
|
||||
|
||||
/* move to gtt */
|
||||
ret = amdgpu_bo_reserve(bo, false);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
}
|
||||
|
||||
amdgpu_bo_unreserve(bo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
.attach = amdgpu_gem_map_attach,
|
||||
.detach = amdgpu_gem_map_detach,
|
||||
.map_dma_buf = drm_gem_map_dma_buf,
|
||||
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
||||
.release = drm_gem_dmabuf_release,
|
||||
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
|
||||
.mmap = drm_gem_dmabuf_mmap,
|
||||
.vmap = drm_gem_dmabuf_vmap,
|
||||
.vunmap = drm_gem_dmabuf_vunmap,
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
|
||||
* @dev: DRM device
|
||||
* @gobj: GEM BO
|
||||
* @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
|
||||
*
|
||||
* The main work is done by the &drm_gem_prime_export helper, which in turn
|
||||
* uses &amdgpu_gem_prime_res_obj.
|
||||
*
|
||||
* Returns:
|
||||
* Shared DMA buffer representing the GEM BO from the given device.
|
||||
*/
|
||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||
struct drm_gem_object *gobj,
|
||||
int flags)
|
||||
{
|
||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
|
||||
struct dma_buf *buf;
|
||||
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
|
||||
bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
buf = drm_gem_prime_export(dev, gobj, flags);
|
||||
if (!IS_ERR(buf)) {
|
||||
buf->file->f_mapping = dev->anon_inode->i_mapping;
|
||||
buf->ops = &amdgpu_dmabuf_ops;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
|
||||
* @dev: DRM device
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 2019 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#ifndef __AMDGPU_DMA_BUF_H__
|
||||
#define __AMDGPU_DMA_BUF_H__
|
||||
|
||||
#include <drm/drm_gem.h>
|
||||
|
||||
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
|
||||
struct drm_gem_object *
|
||||
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
|
||||
struct dma_buf_attachment *attach,
|
||||
struct sg_table *sg);
|
||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||
struct drm_gem_object *gobj,
|
||||
int flags);
|
||||
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
|
||||
struct dma_buf *dma_buf);
|
||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
|
||||
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
||||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
|
||||
struct vm_area_struct *vma);
|
||||
|
||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
#endif
|
|
@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
|
|||
int min_temp;
|
||||
/* high temperature threshold */
|
||||
int max_temp;
|
||||
/* edge max emergency(shutdown) temp */
|
||||
int max_edge_emergency_temp;
|
||||
/* hotspot low temperature threshold */
|
||||
int min_hotspot_temp;
|
||||
/* hotspot high temperature critical threshold */
|
||||
int max_hotspot_crit_temp;
|
||||
/* hotspot max emergency(shutdown) temp */
|
||||
int max_hotspot_emergency_temp;
|
||||
/* memory low temperature threshold */
|
||||
int min_mem_temp;
|
||||
/* memory high temperature critical threshold */
|
||||
int max_mem_crit_temp;
|
||||
/* memory max emergency(shutdown) temp */
|
||||
int max_mem_emergency_temp;
|
||||
/* was last interrupt low to high or high to low */
|
||||
bool high_to_low;
|
||||
/* interrupt source */
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_irq.h"
|
||||
#include "amdgpu_gem.h"
|
||||
#include "amdgpu_dma_buf.h"
|
||||
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
|
@ -81,6 +81,8 @@
|
|||
#define KMS_DRIVER_MINOR 32
|
||||
#define KMS_DRIVER_PATCHLEVEL 0
|
||||
|
||||
#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
|
||||
|
||||
int amdgpu_vram_limit = 0;
|
||||
int amdgpu_vis_vram_limit = 0;
|
||||
int amdgpu_gart_size = -1; /* auto */
|
||||
|
@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0;
|
|||
int amdgpu_hw_i2c = 0;
|
||||
int amdgpu_pcie_gen2 = -1;
|
||||
int amdgpu_msi = -1;
|
||||
int amdgpu_lockup_timeout = 10000;
|
||||
char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
|
||||
int amdgpu_dpm = -1;
|
||||
int amdgpu_fw_load_type = -1;
|
||||
int amdgpu_aspm = -1;
|
||||
|
@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
|
|||
module_param_named(msi, amdgpu_msi, int, 0444);
|
||||
|
||||
/**
|
||||
* DOC: lockup_timeout (int)
|
||||
* Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
|
||||
* Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
|
||||
* DOC: lockup_timeout (string)
|
||||
* Set GPU scheduler timeout value in ms.
|
||||
*
|
||||
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
|
||||
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
|
||||
* to default timeout.
|
||||
* - With one value specified, the setting will apply to all non-compute jobs.
|
||||
* - With multiple values specified, the first one will be for GFX. The second one is for Compute.
|
||||
* And the third and fourth ones are for SDMA and Video.
|
||||
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
|
||||
* jobs is 10000. And there is no timeout enforced on compute jobs.
|
||||
*/
|
||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
|
||||
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
|
||||
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
|
||||
"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
|
||||
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
|
||||
|
||||
/**
|
||||
* DOC: dpm (int)
|
||||
|
@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry,
|
|||
int halt_if_hws_hang;
|
||||
module_param(halt_if_hws_hang, int, 0644);
|
||||
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
|
||||
|
||||
/**
|
||||
* DOC: hws_gws_support(bool)
|
||||
* Whether HWS support gws barriers. Default value: false (not supported)
|
||||
* This will be replaced with a MEC firmware version check once firmware
|
||||
* is ready
|
||||
*/
|
||||
bool hws_gws_support;
|
||||
module_param(hws_gws_support, bool, 0444);
|
||||
MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
|
||||
{
|
||||
char *input = amdgpu_lockup_timeout;
|
||||
char *timeout_setting = NULL;
|
||||
int index = 0;
|
||||
long timeout;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* By default timeout for non compute jobs is 10000.
|
||||
* And there is no timeout enforced on compute jobs.
|
||||
*/
|
||||
adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
|
||||
adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
|
||||
while ((timeout_setting = strsep(&input, ",")) &&
|
||||
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
|
||||
ret = kstrtol(timeout_setting, 0, &timeout);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Invalidate 0 and negative values */
|
||||
if (timeout <= 0) {
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (index++) {
|
||||
case 0:
|
||||
adev->gfx_timeout = timeout;
|
||||
break;
|
||||
case 1:
|
||||
adev->compute_timeout = timeout;
|
||||
break;
|
||||
case 2:
|
||||
adev->sdma_timeout = timeout;
|
||||
break;
|
||||
case 3:
|
||||
adev->video_timeout = timeout;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* There is only one value specified and
|
||||
* it should apply to all non-compute jobs.
|
||||
*/
|
||||
if (index == 1)
|
||||
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
|
||||
bool in_vblank_irq, int *vpos, int *hpos,
|
||||
|
@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = {
|
|||
.driver_features =
|
||||
DRIVER_USE_AGP | DRIVER_ATOMIC |
|
||||
DRIVER_GEM |
|
||||
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
|
||||
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
|
||||
DRIVER_SYNCOBJ_TIMELINE,
|
||||
.load = amdgpu_driver_load_kms,
|
||||
.open = amdgpu_driver_open_kms,
|
||||
.postclose = amdgpu_driver_postclose_kms,
|
||||
|
|
|
@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
|||
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
||||
unsigned num_hw_submission)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
long timeout;
|
||||
int r;
|
||||
|
||||
if (!adev)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check that num_hw_submission is a power of two */
|
||||
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
|
||||
return -EINVAL;
|
||||
|
@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
|||
|
||||
/* No need to setup the GPU scheduler for KIQ ring */
|
||||
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
|
||||
/* for non-sriov case, no timeout enforce on compute ring */
|
||||
if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
|
||||
&& !amdgpu_sriov_vf(ring->adev))
|
||||
timeout = MAX_SCHEDULE_TIMEOUT;
|
||||
else
|
||||
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
|
||||
switch (ring->funcs->type) {
|
||||
case AMDGPU_RING_TYPE_GFX:
|
||||
timeout = adev->gfx_timeout;
|
||||
break;
|
||||
case AMDGPU_RING_TYPE_COMPUTE:
|
||||
/*
|
||||
* For non-sriov case, no timeout enforce
|
||||
* on compute ring by default. Unless user
|
||||
* specifies a timeout for compute ring.
|
||||
*
|
||||
* For sriov case, always use the timeout
|
||||
* as gfx ring
|
||||
*/
|
||||
if (!amdgpu_sriov_vf(ring->adev))
|
||||
timeout = adev->compute_timeout;
|
||||
else
|
||||
timeout = adev->gfx_timeout;
|
||||
break;
|
||||
case AMDGPU_RING_TYPE_SDMA:
|
||||
timeout = adev->sdma_timeout;
|
||||
break;
|
||||
default:
|
||||
timeout = adev->video_timeout;
|
||||
break;
|
||||
}
|
||||
|
||||
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
|
||||
num_hw_submission, amdgpu_job_hang_limit,
|
||||
|
|
|
@ -27,26 +27,11 @@
|
|||
struct amdgpu_ring;
|
||||
struct amdgpu_bo;
|
||||
|
||||
struct amdgpu_gds_asic_info {
|
||||
uint32_t total_size;
|
||||
uint32_t gfx_partition_size;
|
||||
uint32_t cs_partition_size;
|
||||
};
|
||||
|
||||
struct amdgpu_gds {
|
||||
struct amdgpu_gds_asic_info mem;
|
||||
struct amdgpu_gds_asic_info gws;
|
||||
struct amdgpu_gds_asic_info oa;
|
||||
uint32_t gds_size;
|
||||
uint32_t gws_size;
|
||||
uint32_t oa_size;
|
||||
uint32_t gds_compute_max_wave_id;
|
||||
|
||||
/* At present, GDS, GWS and OA resources for gfx (graphics)
|
||||
* is always pre-allocated and available for graphics operation.
|
||||
* Such resource is shared between all gfx clients.
|
||||
* TODO: move this operation to user space
|
||||
* */
|
||||
struct amdgpu_bo* gds_gfx_bo;
|
||||
struct amdgpu_bo* gws_gfx_bo;
|
||||
struct amdgpu_bo* oa_gfx_bo;
|
||||
};
|
||||
|
||||
struct amdgpu_gds_reg_offset {
|
||||
|
|
|
@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|||
|
||||
r = amdgpu_bo_reserve(bo, true);
|
||||
if (r)
|
||||
goto free_pages;
|
||||
goto user_pages_done;
|
||||
|
||||
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
|
||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
amdgpu_bo_unreserve(bo);
|
||||
if (r)
|
||||
goto free_pages;
|
||||
goto user_pages_done;
|
||||
}
|
||||
|
||||
r = drm_gem_handle_create(filp, gobj, &handle);
|
||||
/* drop reference from allocate - handle holds it now */
|
||||
drm_gem_object_put_unlocked(gobj);
|
||||
if (r)
|
||||
return r;
|
||||
goto user_pages_done;
|
||||
|
||||
args->handle = handle;
|
||||
return 0;
|
||||
|
||||
free_pages:
|
||||
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
|
||||
user_pages_done:
|
||||
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
|
||||
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
|
||||
|
||||
release_object:
|
||||
drm_gem_object_put_unlocked(gobj);
|
||||
|
|
|
@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
|
|||
void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
||||
struct drm_file *file_priv);
|
||||
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
|
||||
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
|
||||
struct drm_gem_object *
|
||||
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
|
||||
struct dma_buf_attachment *attach,
|
||||
struct sg_table *sg);
|
||||
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
|
||||
struct drm_gem_object *gobj,
|
||||
int flags);
|
||||
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
|
||||
struct dma_buf *dma_buf);
|
||||
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
|
||||
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
||||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
|
||||
|
||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/*
|
||||
* GEM objects.
|
||||
|
|
|
@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
|
|||
|
||||
if (amdgpu_device_should_recover_gpu(ring->adev))
|
||||
amdgpu_device_gpu_recover(ring->adev, job);
|
||||
else
|
||||
drm_sched_suspend_timeout(&ring->sched);
|
||||
}
|
||||
|
||||
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
|
||||
|
|
|
@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
|
|||
struct drm_amdgpu_info_gds gds_info;
|
||||
|
||||
memset(&gds_info, 0, sizeof(gds_info));
|
||||
gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
|
||||
gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
|
||||
gds_info.gds_total_size = adev->gds.mem.total_size;
|
||||
gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
|
||||
gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
|
||||
gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
|
||||
gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
|
||||
gds_info.compute_partition_size = adev->gds.gds_size;
|
||||
gds_info.gds_total_size = adev->gds.gds_size;
|
||||
gds_info.gws_per_compute_partition = adev->gds.gws_size;
|
||||
gds_info.oa_per_compute_partition = adev->gds.oa_size;
|
||||
return copy_to_user(out, &gds_info,
|
||||
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/interval_tree.h>
|
||||
#include <drm/drmP.h>
|
||||
#include <drm/drm.h>
|
||||
|
@ -58,14 +58,12 @@
|
|||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @mm: process address space
|
||||
* @mn: MMU notifier structure
|
||||
* @type: type of MMU notifier
|
||||
* @work: destruction work item
|
||||
* @node: hash table node to find structure by adev and mn
|
||||
* @lock: rw semaphore protecting the notifier nodes
|
||||
* @objects: interval tree containing amdgpu_mn_nodes
|
||||
* @read_lock: mutex for recursive locking of @lock
|
||||
* @recursion: depth of recursion
|
||||
* @mirror: HMM mirror function support
|
||||
*
|
||||
* Data for each amdgpu device and process address space.
|
||||
*/
|
||||
|
@ -73,7 +71,6 @@ struct amdgpu_mn {
|
|||
/* constant after initialisation */
|
||||
struct amdgpu_device *adev;
|
||||
struct mm_struct *mm;
|
||||
struct mmu_notifier mn;
|
||||
enum amdgpu_mn_type type;
|
||||
|
||||
/* only used on destruction */
|
||||
|
@ -85,8 +82,9 @@ struct amdgpu_mn {
|
|||
/* objects protected by lock */
|
||||
struct rw_semaphore lock;
|
||||
struct rb_root_cached objects;
|
||||
struct mutex read_lock;
|
||||
atomic_t recursion;
|
||||
|
||||
/* HMM mirror */
|
||||
struct hmm_mirror mirror;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -103,7 +101,7 @@ struct amdgpu_mn_node {
|
|||
};
|
||||
|
||||
/**
|
||||
* amdgpu_mn_destroy - destroy the MMU notifier
|
||||
* amdgpu_mn_destroy - destroy the HMM mirror
|
||||
*
|
||||
* @work: previously sheduled work item
|
||||
*
|
||||
|
@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
|
|||
}
|
||||
up_write(&amn->lock);
|
||||
mutex_unlock(&adev->mn_lock);
|
||||
mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
|
||||
|
||||
hmm_mirror_unregister(&amn->mirror);
|
||||
kfree(amn);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_release - callback to notify about mm destruction
|
||||
* amdgpu_hmm_mirror_release - callback to notify about mm destruction
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @mm: the mm this callback is about
|
||||
* @mirror: the HMM mirror (mm) this callback is about
|
||||
*
|
||||
* Shedule a work item to lazy destroy our notifier.
|
||||
* Shedule a work item to lazy destroy HMM mirror.
|
||||
*/
|
||||
static void amdgpu_mn_release(struct mmu_notifier *mn,
|
||||
struct mm_struct *mm)
|
||||
static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
|
||||
{
|
||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
||||
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||
|
||||
INIT_WORK(&amn->work, amdgpu_mn_destroy);
|
||||
schedule_work(&amn->work);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_mn_lock - take the write side lock for this notifier
|
||||
*
|
||||
|
@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
|
|||
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
|
||||
{
|
||||
if (blockable)
|
||||
mutex_lock(&amn->read_lock);
|
||||
else if (!mutex_trylock(&amn->read_lock))
|
||||
down_read(&amn->lock);
|
||||
else if (!down_read_trylock(&amn->lock))
|
||||
return -EAGAIN;
|
||||
|
||||
if (atomic_inc_return(&amn->recursion) == 1)
|
||||
down_read_non_owner(&amn->lock);
|
||||
mutex_unlock(&amn->read_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
|
|||
*/
|
||||
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
|
||||
{
|
||||
if (atomic_dec_return(&amn->recursion) == 0)
|
||||
up_read_non_owner(&amn->lock);
|
||||
up_read(&amn->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
|
|||
true, false, MAX_SCHEDULE_TIMEOUT);
|
||||
if (r <= 0)
|
||||
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
|
||||
|
||||
amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
|
||||
* amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @range: mmu notifier context
|
||||
* @mirror: the hmm_mirror (mm) is about to update
|
||||
* @update: the update start, end address
|
||||
*
|
||||
* Block for operations on BOs to finish and mark pages as accessed and
|
||||
* potentially dirty.
|
||||
*/
|
||||
static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
|
||||
const struct mmu_notifier_range *range)
|
||||
static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
|
||||
const struct hmm_update *update)
|
||||
{
|
||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
||||
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||
unsigned long start = update->start;
|
||||
unsigned long end = update->end;
|
||||
bool blockable = update->blockable;
|
||||
struct interval_tree_node *it;
|
||||
unsigned long end;
|
||||
|
||||
/* notification is exclusive, but interval is inclusive */
|
||||
end = range->end - 1;
|
||||
end -= 1;
|
||||
|
||||
/* TODO we should be able to split locking for interval tree and
|
||||
* amdgpu_mn_invalidate_node
|
||||
*/
|
||||
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
|
||||
if (amdgpu_mn_read_lock(amn, blockable))
|
||||
return -EAGAIN;
|
||||
|
||||
it = interval_tree_iter_first(&amn->objects, range->start, end);
|
||||
it = interval_tree_iter_first(&amn->objects, start, end);
|
||||
while (it) {
|
||||
struct amdgpu_mn_node *node;
|
||||
|
||||
if (!mmu_notifier_range_blockable(range)) {
|
||||
if (!blockable) {
|
||||
amdgpu_mn_read_unlock(amn);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
node = container_of(it, struct amdgpu_mn_node, it);
|
||||
it = interval_tree_iter_next(it, range->start, end);
|
||||
it = interval_tree_iter_next(it, start, end);
|
||||
|
||||
amdgpu_mn_invalidate_node(node, range->start, end);
|
||||
amdgpu_mn_invalidate_node(node, start, end);
|
||||
}
|
||||
|
||||
amdgpu_mn_read_unlock(amn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
|
||||
* amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @mm: the mm this callback is about
|
||||
* @start: start of updated range
|
||||
* @end: end of updated range
|
||||
* @mirror: the hmm_mirror (mm) is about to update
|
||||
* @update: the update start, end address
|
||||
*
|
||||
* We temporarily evict all BOs between start and end. This
|
||||
* necessitates evicting all user-mode queues of the process. The BOs
|
||||
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
|
||||
*/
|
||||
static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
|
||||
const struct mmu_notifier_range *range)
|
||||
static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
|
||||
const struct hmm_update *update)
|
||||
{
|
||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
||||
struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
|
||||
unsigned long start = update->start;
|
||||
unsigned long end = update->end;
|
||||
bool blockable = update->blockable;
|
||||
struct interval_tree_node *it;
|
||||
unsigned long end;
|
||||
|
||||
/* notification is exclusive, but interval is inclusive */
|
||||
end = range->end - 1;
|
||||
end -= 1;
|
||||
|
||||
if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
|
||||
if (amdgpu_mn_read_lock(amn, blockable))
|
||||
return -EAGAIN;
|
||||
|
||||
it = interval_tree_iter_first(&amn->objects, range->start, end);
|
||||
it = interval_tree_iter_first(&amn->objects, start, end);
|
||||
while (it) {
|
||||
struct amdgpu_mn_node *node;
|
||||
struct amdgpu_bo *bo;
|
||||
|
||||
if (!mmu_notifier_range_blockable(range)) {
|
||||
if (!blockable) {
|
||||
amdgpu_mn_read_unlock(amn);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
node = container_of(it, struct amdgpu_mn_node, it);
|
||||
it = interval_tree_iter_next(it, range->start, end);
|
||||
it = interval_tree_iter_next(it, start, end);
|
||||
|
||||
list_for_each_entry(bo, &node->bos, mn_list) {
|
||||
struct kgd_mem *mem = bo->kfd_bo;
|
||||
|
||||
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
|
||||
range->start,
|
||||
end))
|
||||
amdgpu_amdkfd_evict_userptr(mem, range->mm);
|
||||
start, end))
|
||||
amdgpu_amdkfd_evict_userptr(mem, amn->mm);
|
||||
}
|
||||
}
|
||||
|
||||
amdgpu_mn_read_unlock(amn);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
|
||||
*
|
||||
* @mn: our notifier
|
||||
* @mm: the mm this callback is about
|
||||
* @start: start of updated range
|
||||
* @end: end of updated range
|
||||
*
|
||||
* Release the lock again to allow new command submissions.
|
||||
*/
|
||||
static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
|
||||
const struct mmu_notifier_range *range)
|
||||
{
|
||||
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
|
||||
|
||||
amdgpu_mn_read_unlock(amn);
|
||||
}
|
||||
|
||||
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
|
||||
[AMDGPU_MN_TYPE_GFX] = {
|
||||
.release = amdgpu_mn_release,
|
||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
|
||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
||||
},
|
||||
[AMDGPU_MN_TYPE_HSA] = {
|
||||
.release = amdgpu_mn_release,
|
||||
.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
|
||||
.invalidate_range_end = amdgpu_mn_invalidate_range_end,
|
||||
},
|
||||
};
|
||||
|
||||
/* Low bits of any reasonable mm pointer will be unused due to struct
|
||||
* alignment. Use these bits to make a unique key from the mm pointer
|
||||
* and notifier type.
|
||||
*/
|
||||
#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
|
||||
|
||||
static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
|
||||
[AMDGPU_MN_TYPE_GFX] = {
|
||||
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
|
||||
.release = amdgpu_hmm_mirror_release
|
||||
},
|
||||
[AMDGPU_MN_TYPE_HSA] = {
|
||||
.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
|
||||
.release = amdgpu_hmm_mirror_release
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_mn_get - create notifier context
|
||||
* amdgpu_mn_get - create HMM mirror context
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @type: type of MMU notifier context
|
||||
*
|
||||
* Creates a notifier context for current->mm.
|
||||
* Creates a HMM mirror context for current->mm.
|
||||
*/
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||
enum amdgpu_mn_type type)
|
||||
|
@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
|||
amn->mm = mm;
|
||||
init_rwsem(&amn->lock);
|
||||
amn->type = type;
|
||||
amn->mn.ops = &amdgpu_mn_ops[type];
|
||||
amn->objects = RB_ROOT_CACHED;
|
||||
mutex_init(&amn->read_lock);
|
||||
atomic_set(&amn->recursion, 0);
|
||||
|
||||
r = __mmu_notifier_register(&amn->mn, mm);
|
||||
amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
|
||||
r = hmm_mirror_register(&amn->mirror, mm);
|
||||
if (r)
|
||||
goto free_amn;
|
||||
|
||||
|
@ -432,7 +404,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
|||
* @bo: amdgpu buffer object
|
||||
* @addr: userptr addr we should monitor
|
||||
*
|
||||
* Registers an MMU notifier for the given BO at the specified address.
|
||||
* Registers an HMM mirror for the given BO at the specified address.
|
||||
* Returns 0 on success, -ERRNO if anything goes wrong.
|
||||
*/
|
||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||
|
@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
|||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mn_unregister - unregister a BO for notifier updates
|
||||
* amdgpu_mn_unregister - unregister a BO for HMM mirror updates
|
||||
*
|
||||
* @bo: amdgpu buffer object
|
||||
*
|
||||
* Remove any registration of MMU notifier updates from the buffer object.
|
||||
* Remove any registration of HMM mirror updates from the buffer object.
|
||||
*/
|
||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
||||
{
|
||||
|
@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
|
|||
mutex_unlock(&adev->mn_lock);
|
||||
}
|
||||
|
||||
/* flags used by HMM internal, not related to CPU/GPU PTE flags */
|
||||
static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
|
||||
(1 << 0), /* HMM_PFN_VALID */
|
||||
(1 << 1), /* HMM_PFN_WRITE */
|
||||
0 /* HMM_PFN_DEVICE_PRIVATE */
|
||||
};
|
||||
|
||||
static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
|
||||
0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
|
||||
0, /* HMM_PFN_NONE */
|
||||
0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
|
||||
};
|
||||
|
||||
void amdgpu_hmm_init_range(struct hmm_range *range)
|
||||
{
|
||||
if (range) {
|
||||
range->flags = hmm_range_flags;
|
||||
range->values = hmm_range_values;
|
||||
range->pfn_shift = PAGE_SHIFT;
|
||||
range->pfns = NULL;
|
||||
INIT_LIST_HEAD(&range->list);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,22 +25,24 @@
|
|||
#define __AMDGPU_MN_H__
|
||||
|
||||
/*
|
||||
* MMU Notifier
|
||||
* HMM mirror
|
||||
*/
|
||||
struct amdgpu_mn;
|
||||
struct hmm_range;
|
||||
|
||||
enum amdgpu_mn_type {
|
||||
AMDGPU_MN_TYPE_GFX,
|
||||
AMDGPU_MN_TYPE_HSA,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_MMU_NOTIFIER)
|
||||
#if defined(CONFIG_HMM_MIRROR)
|
||||
void amdgpu_mn_lock(struct amdgpu_mn *mn);
|
||||
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
|
||||
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
||||
enum amdgpu_mn_type type);
|
||||
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
|
||||
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
|
||||
void amdgpu_hmm_init_range(struct hmm_range *range);
|
||||
#else
|
||||
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
|
||||
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
|
||||
|
@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
|
|||
}
|
||||
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
|
||||
{
|
||||
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
|
||||
"add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
|
||||
|
|
|
@ -331,8 +331,6 @@ struct amdgpu_mode_info {
|
|||
struct drm_property *audio_property;
|
||||
/* FMT dithering */
|
||||
struct drm_property *dither_property;
|
||||
/* maximum number of bits per channel for monitor color */
|
||||
struct drm_property *max_bpc_property;
|
||||
/* Adaptive Backlight Modulation (power feature) */
|
||||
struct drm_property *abm_level_property;
|
||||
/* hardcoded DFP edid from BIOS */
|
||||
|
|
|
@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {
|
|||
{0, NULL},
|
||||
};
|
||||
|
||||
static const struct hwmon_temp_label {
|
||||
enum PP_HWMON_TEMP channel;
|
||||
const char *label;
|
||||
} temp_label[] = {
|
||||
{PP_TEMP_EDGE, "edge"},
|
||||
{PP_TEMP_JUNCTION, "junction"},
|
||||
{PP_TEMP_MEM, "mem"},
|
||||
};
|
||||
|
||||
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->pm.dpm_enabled) {
|
||||
|
@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
|
|||
|
||||
pr_debug("featuremask = 0x%llx\n", featuremask);
|
||||
|
||||
if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
||||
if (is_support_sw_smu(adev)) {
|
||||
ret = smu_set_ppfeature_status(&adev->smu, featuremask);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
|
||||
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
|
|||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
||||
if (is_support_sw_smu(adev)) {
|
||||
return smu_get_ppfeature_status(&adev->smu, buf);
|
||||
} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
|
||||
return amdgpu_dpm_get_ppfeature_status(adev, buf);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "\n");
|
||||
|
@ -1302,6 +1317,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
|
|||
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: mem_busy_percent
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for reading how busy the VRAM
|
||||
* is as a percentage. The file mem_busy_percent is used for this.
|
||||
* The SMU firmware computes a percentage of load based on the
|
||||
* aggregate activity level in the IP cores.
|
||||
*/
|
||||
static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
int r, value, size = sizeof(value);
|
||||
|
||||
/* read the IP busy sensor */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
|
||||
(void *)&value, &size);
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", value);
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: pcie_bw
|
||||
*
|
||||
|
@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
|
|||
count0, count1, pcie_get_mps(adev->pdev));
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: unique_id
|
||||
*
|
||||
* The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
|
||||
* The file unique_id is used for this.
|
||||
* This will provide a Unique ID that will persist from machine to machine
|
||||
*
|
||||
* NOTE: This will only work for GFX9 and newer. This file will be absent
|
||||
* on unsupported ASICs (GFX8 and older)
|
||||
*/
|
||||
static ssize_t amdgpu_get_unique_id(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct drm_device *ddev = dev_get_drvdata(dev);
|
||||
struct amdgpu_device *adev = ddev->dev_private;
|
||||
|
||||
if (adev->unique_id)
|
||||
return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
|
||||
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_dpm_forced_performance_level,
|
||||
|
@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
|
|||
amdgpu_set_pp_od_clk_voltage);
|
||||
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
|
||||
amdgpu_get_busy_percent, NULL);
|
||||
static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
|
||||
amdgpu_get_memory_busy_percent, NULL);
|
||||
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
|
||||
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
|
||||
amdgpu_get_ppfeature_status,
|
||||
amdgpu_set_ppfeature_status);
|
||||
static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
|
@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
|||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
struct drm_device *ddev = adev->ddev;
|
||||
int channel = to_sensor_dev_attr(attr)->index;
|
||||
int r, temp, size = sizeof(temp);
|
||||
|
||||
/* Can't get temperature when the card is off */
|
||||
|
@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
|
|||
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
|
||||
return -EINVAL;
|
||||
|
||||
/* get the temperature */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
|
||||
(void *)&temp, &size);
|
||||
if (r)
|
||||
return r;
|
||||
if (channel >= PP_TEMP_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
switch (channel) {
|
||||
case PP_TEMP_JUNCTION:
|
||||
/* get current junction temperature */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
|
||||
(void *)&temp, &size);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case PP_TEMP_EDGE:
|
||||
/* get current edge temperature */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
|
||||
(void *)&temp, &size);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case PP_TEMP_MEM:
|
||||
/* get current memory temperature */
|
||||
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
|
||||
(void *)&temp, &size);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
}
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||
}
|
||||
|
@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
|
|||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
int hyst = to_sensor_dev_attr(attr)->index;
|
||||
int temp;
|
||||
|
||||
if (hyst)
|
||||
temp = adev->pm.dpm.thermal.min_hotspot_temp;
|
||||
else
|
||||
temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
int hyst = to_sensor_dev_attr(attr)->index;
|
||||
int temp;
|
||||
|
||||
if (hyst)
|
||||
temp = adev->pm.dpm.thermal.min_mem_temp;
|
||||
else
|
||||
temp = adev->pm.dpm.thermal.max_mem_crit_temp;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
int channel = to_sensor_dev_attr(attr)->index;
|
||||
|
||||
if (channel >= PP_TEMP_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct amdgpu_device *adev = dev_get_drvdata(dev);
|
||||
int channel = to_sensor_dev_attr(attr)->index;
|
||||
int temp = 0;
|
||||
|
||||
if (channel >= PP_TEMP_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
switch (channel) {
|
||||
case PP_TEMP_JUNCTION:
|
||||
temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
|
||||
break;
|
||||
case PP_TEMP_EDGE:
|
||||
temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
|
||||
break;
|
||||
case PP_TEMP_MEM:
|
||||
temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
|
||||
break;
|
||||
}
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
|
@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
|
|||
*
|
||||
* hwmon interfaces for GPU temperature:
|
||||
*
|
||||
* - temp1_input: the on die GPU temperature in millidegrees Celsius
|
||||
* - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
|
||||
* - temp2_input and temp3_input are supported on SOC15 dGPUs only
|
||||
*
|
||||
* - temp1_crit: temperature critical max value in millidegrees Celsius
|
||||
* - temp[1-3]_label: temperature channel label
|
||||
* - temp2_label and temp3_label are supported on SOC15 dGPUs only
|
||||
*
|
||||
* - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
|
||||
* - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
|
||||
* - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
|
||||
*
|
||||
* - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
|
||||
* - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
|
||||
*
|
||||
* - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
|
||||
* - these are supported on SOC15 dGPUs only
|
||||
*
|
||||
* hwmon interfaces for GPU voltage:
|
||||
*
|
||||
|
@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
|
|||
*
|
||||
*/
|
||||
|
||||
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
|
||||
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
|
||||
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
|
||||
static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
|
||||
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
|
||||
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
|
||||
static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
|
||||
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
|
||||
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
|
||||
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
|
||||
static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
|
||||
static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
|
||||
static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
|
||||
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
|
||||
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
|
||||
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
|
||||
|
@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {
|
|||
&sensor_dev_attr_temp1_input.dev_attr.attr,
|
||||
&sensor_dev_attr_temp1_crit.dev_attr.attr,
|
||||
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
|
||||
&sensor_dev_attr_temp2_input.dev_attr.attr,
|
||||
&sensor_dev_attr_temp2_crit.dev_attr.attr,
|
||||
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
|
||||
&sensor_dev_attr_temp3_input.dev_attr.attr,
|
||||
&sensor_dev_attr_temp3_crit.dev_attr.attr,
|
||||
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
|
||||
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
|
||||
&sensor_dev_attr_temp2_emergency.dev_attr.attr,
|
||||
&sensor_dev_attr_temp3_emergency.dev_attr.attr,
|
||||
&sensor_dev_attr_temp1_label.dev_attr.attr,
|
||||
&sensor_dev_attr_temp2_label.dev_attr.attr,
|
||||
&sensor_dev_attr_temp3_label.dev_attr.attr,
|
||||
&sensor_dev_attr_pwm1.dev_attr.attr,
|
||||
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
|
||||
&sensor_dev_attr_pwm1_min.dev_attr.attr,
|
||||
|
@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
|||
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* only SOC15 dGPUs support hotspot and mem temperatures */
|
||||
if (((adev->flags & AMD_IS_APU) ||
|
||||
adev->asic_type < CHIP_VEGA10) &&
|
||||
(attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
return effective_mode;
|
||||
}
|
||||
|
||||
|
@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
|||
"gpu_busy_level\n");
|
||||
return ret;
|
||||
}
|
||||
/* APU does not have its own dedicated memory */
|
||||
if (!(adev->flags & AMD_IS_APU)) {
|
||||
ret = device_create_file(adev->dev,
|
||||
&dev_attr_mem_busy_percent);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file "
|
||||
"mem_busy_percent\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
/* PCIe Perf counters won't work on APU nodes */
|
||||
if (!(adev->flags & AMD_IS_APU)) {
|
||||
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
|
||||
|
@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
if (adev->unique_id)
|
||||
ret = device_create_file(adev->dev, &dev_attr_unique_id);
|
||||
if (ret) {
|
||||
DRM_ERROR("failed to create device file unique_id\n");
|
||||
return ret;
|
||||
}
|
||||
ret = amdgpu_debugfs_pm_init(adev);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to register debugfs file for dpm!\n");
|
||||
|
@ -2677,8 +2901,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
|
|||
device_remove_file(adev->dev,
|
||||
&dev_attr_pp_od_clk_voltage);
|
||||
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
|
||||
if (!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
|
||||
if (!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_pcie_bw);
|
||||
if (adev->unique_id)
|
||||
device_remove_file(adev->dev, &dev_attr_unique_id);
|
||||
if ((adev->asic_type >= CHIP_VEGA10) &&
|
||||
!(adev->flags & AMD_IS_APU))
|
||||
device_remove_file(adev->dev, &dev_attr_ppfeatures);
|
||||
|
@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
|
|||
/* GPU Load */
|
||||
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
|
||||
seq_printf(m, "GPU Load: %u %%\n", value);
|
||||
/* MEM Load */
|
||||
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
|
||||
seq_printf(m, "MEM Load: %u %%\n", value);
|
||||
|
||||
seq_printf(m, "\n");
|
||||
|
||||
/* SMC feature mask */
|
||||
|
|
|
@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint32_t id, uint32_t value)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_PROG_REG;
|
||||
cmd->cmd.cmd_setup_reg_prog.reg_value = value;
|
||||
cmd->cmd.cmd_setup_reg_prog.reg_id = id;
|
||||
}
|
||||
|
||||
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
|
||||
uint32_t value)
|
||||
{
|
||||
struct psp_gfx_cmd_resp *cmd = NULL;
|
||||
int ret = 0;
|
||||
|
||||
if (reg >= PSP_REG_LAST)
|
||||
return -EINVAL;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_reg_prog_cmd_buf(cmd, reg, value);
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
|
||||
uint32_t xgmi_ta_size, uint32_t shared_size)
|
||||
|
|
|
@ -62,6 +62,14 @@ struct psp_ring
|
|||
uint32_t ring_size;
|
||||
};
|
||||
|
||||
/* More registers may will be supported */
|
||||
enum psp_reg_prog_id {
|
||||
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
|
||||
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
|
||||
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
|
||||
PSP_REG_LAST
|
||||
};
|
||||
|
||||
struct psp_funcs
|
||||
{
|
||||
int (*init_microcode)(struct psp_context *psp);
|
||||
|
@ -95,12 +103,26 @@ struct psp_funcs
|
|||
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
|
||||
};
|
||||
|
||||
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
||||
struct psp_xgmi_node_info {
|
||||
uint64_t node_id;
|
||||
uint8_t num_hops;
|
||||
uint8_t is_sharing_enabled;
|
||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||
};
|
||||
|
||||
struct psp_xgmi_topology_info {
|
||||
uint32_t num_nodes;
|
||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
struct psp_xgmi_context {
|
||||
uint8_t initialized;
|
||||
uint32_t session_id;
|
||||
struct amdgpu_bo *xgmi_shared_bo;
|
||||
uint64_t xgmi_shared_mc_addr;
|
||||
void *xgmi_shared_buf;
|
||||
struct psp_xgmi_topology_info top_info;
|
||||
};
|
||||
|
||||
struct psp_ras_context {
|
||||
|
@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
|
|||
enum AMDGPU_UCODE_ID);
|
||||
};
|
||||
|
||||
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
||||
struct psp_xgmi_node_info {
|
||||
uint64_t node_id;
|
||||
uint8_t num_hops;
|
||||
uint8_t is_sharing_enabled;
|
||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||
};
|
||||
|
||||
struct psp_xgmi_topology_info {
|
||||
uint32_t num_nodes;
|
||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
|
||||
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
|
||||
|
@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
|
|||
union ta_ras_cmd_input *info, bool enable);
|
||||
|
||||
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
|
||||
|
||||
int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
|
||||
uint32_t value);
|
||||
#endif
|
||||
|
|
|
@ -90,6 +90,12 @@ struct ras_manager {
|
|||
struct ras_err_data err_data;
|
||||
};
|
||||
|
||||
struct ras_badpage {
|
||||
unsigned int bp;
|
||||
unsigned int size;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
const char *ras_error_string[] = {
|
||||
"none",
|
||||
"parity",
|
||||
|
@ -118,7 +124,8 @@ const char *ras_block_string[] = {
|
|||
#define ras_err_str(i) (ras_error_string[ffs(i)])
|
||||
#define ras_block_str(i) (ras_block_string[i])
|
||||
|
||||
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
|
||||
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
|
||||
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
|
||||
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
|
||||
|
||||
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
|
||||
|
@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
|
|||
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* DOC: ras debugfs control interface
|
||||
/**
|
||||
* DOC: AMDGPU RAS debugfs control interface
|
||||
*
|
||||
* It accepts struct ras_debug_if who has two members.
|
||||
*
|
||||
|
@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
|
|||
enable ? "enable":"disable",
|
||||
ras_block_str(head->block),
|
||||
ret);
|
||||
if (ret == TA_RAS_STATUS__RESET_NEEDED)
|
||||
return -EAGAIN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
|
|||
return -EINVAL;
|
||||
|
||||
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
|
||||
/* If ras is enabled by vbios, we set up ras object first in
|
||||
* both case. For enable, that is all what we need do. For
|
||||
* disable, we need perform a ras TA disable cmd after that.
|
||||
*/
|
||||
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (enable) {
|
||||
/* There is no harm to issue a ras TA cmd regardless of
|
||||
* the currecnt ras state.
|
||||
* If current state == target state, it will do nothing
|
||||
* But sometimes it requests driver to reset and repost
|
||||
* with error code -EAGAIN.
|
||||
*/
|
||||
ret = amdgpu_ras_feature_enable(adev, head, 1);
|
||||
/* With old ras TA, we might fail to enable ras.
|
||||
* Log it and just setup the object.
|
||||
* TODO need remove this WA in the future.
|
||||
*/
|
||||
if (ret == -EINVAL) {
|
||||
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
||||
if (!ret)
|
||||
DRM_INFO("RAS INFO: %s setup object\n",
|
||||
ras_block_str(head->block));
|
||||
}
|
||||
} else {
|
||||
/* setup the object then issue a ras TA disable cmd.*/
|
||||
ret = __amdgpu_ras_feature_enable(adev, head, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!enable)
|
||||
ret = amdgpu_ras_feature_enable(adev, head, 0);
|
||||
}
|
||||
} else
|
||||
ret = amdgpu_ras_feature_enable(adev, head, enable);
|
||||
|
||||
|
@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
|||
|
||||
/* sysfs begin */
|
||||
|
||||
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||
struct ras_badpage **bps, unsigned int *count);
|
||||
|
||||
static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
|
||||
{
|
||||
switch (flags) {
|
||||
case 0:
|
||||
return "R";
|
||||
case 1:
|
||||
return "P";
|
||||
case 2:
|
||||
default:
|
||||
return "F";
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* DOC: ras sysfs gpu_vram_bad_pages interface
|
||||
*
|
||||
* It allows user to read the bad pages of vram on the gpu through
|
||||
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
|
||||
*
|
||||
* It outputs multiple lines, and each line stands for one gpu page.
|
||||
*
|
||||
* The format of one line is below,
|
||||
* gpu pfn : gpu page size : flags
|
||||
*
|
||||
* gpu pfn and gpu page size are printed in hex format.
|
||||
* flags can be one of below character,
|
||||
* R: reserved, this gpu page is reserved and not able to use.
|
||||
* P: pending for reserve, this gpu page is marked as bad, will be reserved
|
||||
* in next window of page_reserve.
|
||||
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
|
||||
*
|
||||
* examples:
|
||||
* 0x00000001 : 0x00001000 : R
|
||||
* 0x00000002 : 0x00001000 : P
|
||||
*/
|
||||
|
||||
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
|
||||
struct kobject *kobj, struct bin_attribute *attr,
|
||||
char *buf, loff_t ppos, size_t count)
|
||||
{
|
||||
struct amdgpu_ras *con =
|
||||
container_of(attr, struct amdgpu_ras, badpages_attr);
|
||||
struct amdgpu_device *adev = con->adev;
|
||||
const unsigned int element_size =
|
||||
sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
|
||||
unsigned int start = div64_ul(ppos + element_size - 1, element_size);
|
||||
unsigned int end = div64_ul(ppos + count - 1, element_size);
|
||||
ssize_t s = 0;
|
||||
struct ras_badpage *bps = NULL;
|
||||
unsigned int bps_count = 0;
|
||||
|
||||
memset(buf, 0, count);
|
||||
|
||||
if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
|
||||
return 0;
|
||||
|
||||
for (; start < end && start < bps_count; start++)
|
||||
s += scnprintf(&buf[s], element_size + 1,
|
||||
"0x%08x : 0x%08x : %1s\n",
|
||||
bps[start].bp,
|
||||
bps[start].size,
|
||||
amdgpu_ras_badpage_flags_str(bps[start].flags));
|
||||
|
||||
kfree(bps);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
|
@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
|||
&con->features_attr.attr,
|
||||
NULL
|
||||
};
|
||||
struct bin_attribute *bin_attrs[] = {
|
||||
&con->badpages_attr,
|
||||
NULL
|
||||
};
|
||||
struct attribute_group group = {
|
||||
.name = "ras",
|
||||
.attrs = attrs,
|
||||
.bin_attrs = bin_attrs,
|
||||
};
|
||||
|
||||
con->features_attr = (struct device_attribute) {
|
||||
|
@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
|
|||
},
|
||||
.show = amdgpu_ras_sysfs_features_read,
|
||||
};
|
||||
|
||||
con->badpages_attr = (struct bin_attribute) {
|
||||
.attr = {
|
||||
.name = "gpu_vram_bad_pages",
|
||||
.mode = S_IRUGO,
|
||||
},
|
||||
.size = 0,
|
||||
.private = NULL,
|
||||
.read = amdgpu_ras_sysfs_badpages_read,
|
||||
};
|
||||
|
||||
sysfs_attr_init(attrs[0]);
|
||||
sysfs_bin_attr_init(bin_attrs[0]);
|
||||
|
||||
return sysfs_create_group(&adev->dev->kobj, &group);
|
||||
}
|
||||
|
@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
|
|||
&con->features_attr.attr,
|
||||
NULL
|
||||
};
|
||||
struct bin_attribute *bin_attrs[] = {
|
||||
&con->badpages_attr,
|
||||
NULL
|
||||
};
|
||||
struct attribute_group group = {
|
||||
.name = "ras",
|
||||
.attrs = attrs,
|
||||
.bin_attrs = bin_attrs,
|
||||
};
|
||||
|
||||
sysfs_remove_group(&adev->dev->kobj, &group);
|
||||
|
@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
|
|||
/* ih end */
|
||||
|
||||
/* recovery begin */
|
||||
|
||||
/* return 0 on success.
|
||||
* caller need free bps.
|
||||
*/
|
||||
static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
|
||||
struct ras_badpage **bps, unsigned int *count)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_err_handler_data *data;
|
||||
int i = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!con || !con->eh_data || !bps || !count)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&con->recovery_lock);
|
||||
data = con->eh_data;
|
||||
if (!data || data->count == 0) {
|
||||
*bps = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
|
||||
if (!*bps) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (; i < data->count; i++) {
|
||||
(*bps)[i] = (struct ras_badpage){
|
||||
.bp = data->bps[i].bp,
|
||||
.size = AMDGPU_GPU_PAGE_SIZE,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
if (data->last_reserved <= i)
|
||||
(*bps)[i].flags = 1;
|
||||
else if (data->bps[i].bo == NULL)
|
||||
(*bps)[i].flags = 2;
|
||||
}
|
||||
|
||||
*count = data->count;
|
||||
out:
|
||||
mutex_unlock(&con->recovery_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void amdgpu_ras_do_recovery(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_ras *ras =
|
||||
|
@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
|
|||
}
|
||||
/* recovery end */
|
||||
|
||||
/* return 0 if ras will reset gpu and repost.*/
|
||||
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||
unsigned int block)
|
||||
{
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!ras)
|
||||
return -EINVAL;
|
||||
|
||||
ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* check hardware's ras ability which will be saved in hw_supported.
|
||||
* if hardware does not support ras, we can skip some ras initializtion and
|
||||
|
@ -1415,8 +1593,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* do some init work after IP late init as dependence */
|
||||
void amdgpu_ras_post_init(struct amdgpu_device *adev)
|
||||
/* do some init work after IP late init as dependence.
|
||||
* and it runs in resume/gpu reset/booting up cases.
|
||||
*/
|
||||
void amdgpu_ras_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
struct ras_manager *obj, *tmp;
|
||||
|
@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
|
||||
con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
|
||||
/* setup ras obj state as disabled.
|
||||
* for init_by_vbios case.
|
||||
* if we want to enable ras, just enable it in a normal way.
|
||||
* If we want do disable it, need setup ras obj as enabled,
|
||||
* then issue another TA disable cmd.
|
||||
* See feature_enable_on_boot
|
||||
*/
|
||||
amdgpu_ras_disable_all_features(adev, 1);
|
||||
amdgpu_ras_reset_gpu(adev, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_ras_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!con)
|
||||
return;
|
||||
|
||||
amdgpu_ras_disable_all_features(adev, 0);
|
||||
/* Make sure all ras objects are disabled. */
|
||||
if (con->features)
|
||||
amdgpu_ras_disable_all_features(adev, 1);
|
||||
}
|
||||
|
||||
/* do some fini work before IP fini as dependence */
|
||||
|
|
|
@ -93,6 +93,7 @@ struct amdgpu_ras {
|
|||
struct dentry *ent;
|
||||
/* sysfs */
|
||||
struct device_attribute features_attr;
|
||||
struct bin_attribute badpages_attr;
|
||||
/* block array */
|
||||
struct ras_manager *objs;
|
||||
|
||||
|
@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
|
|||
return ras && (ras->supported & (1 << block));
|
||||
}
|
||||
|
||||
int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
|
||||
unsigned int block);
|
||||
|
||||
void amdgpu_ras_resume(struct amdgpu_device *adev);
|
||||
void amdgpu_ras_suspend(struct amdgpu_device *adev);
|
||||
|
||||
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
|
||||
bool is_ce);
|
||||
|
||||
|
@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
|
|||
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
|
||||
bool is_baco)
|
||||
{
|
||||
/* remove me when gpu reset works on vega20 A1. */
|
||||
#if 0
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
|
||||
schedule_work(&ras->recovery_work);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
|
|||
|
||||
/* called in ip_init and ip_fini */
|
||||
int amdgpu_ras_init(struct amdgpu_device *adev);
|
||||
void amdgpu_ras_post_init(struct amdgpu_device *adev);
|
||||
int amdgpu_ras_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
|
||||
|
||||
|
|
|
@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
|
|||
uint32_t align_mask;
|
||||
u32 nop;
|
||||
bool support_64bit_ptrs;
|
||||
bool no_user_fence;
|
||||
unsigned vmhub;
|
||||
unsigned extra_dw;
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <linux/pagemap.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/hmm.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_trace.h"
|
||||
|
@ -703,100 +704,177 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
|
|||
/*
|
||||
* TTM backend functions.
|
||||
*/
|
||||
struct amdgpu_ttm_gup_task_list {
|
||||
struct list_head list;
|
||||
struct task_struct *task;
|
||||
};
|
||||
|
||||
struct amdgpu_ttm_tt {
|
||||
struct ttm_dma_tt ttm;
|
||||
u64 offset;
|
||||
uint64_t userptr;
|
||||
struct task_struct *usertask;
|
||||
uint32_t userflags;
|
||||
spinlock_t guptasklock;
|
||||
struct list_head guptasks;
|
||||
atomic_t mmu_invalidations;
|
||||
uint32_t last_set_pages;
|
||||
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||
struct hmm_range *ranges;
|
||||
int nr_ranges;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
|
||||
* pointer to memory
|
||||
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
|
||||
* memory and start HMM tracking CPU page table update
|
||||
*
|
||||
* Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
|
||||
* This provides a wrapper around the get_user_pages() call to provide
|
||||
* device accessible pages that back user memory.
|
||||
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
|
||||
* once afterwards to stop HMM tracking
|
||||
*/
|
||||
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||
|
||||
/* Support Userptr pages cross max 16 vmas */
|
||||
#define MAX_NR_VMAS (16)
|
||||
|
||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
struct mm_struct *mm = gtt->usertask->mm;
|
||||
unsigned int flags = 0;
|
||||
unsigned pinned = 0;
|
||||
int r;
|
||||
unsigned long start = gtt->userptr;
|
||||
unsigned long end = start + ttm->num_pages * PAGE_SIZE;
|
||||
struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
|
||||
struct hmm_range *ranges;
|
||||
unsigned long nr_pages, i;
|
||||
uint64_t *pfns, f;
|
||||
int r = 0;
|
||||
|
||||
if (!mm) /* Happens during process shutdown */
|
||||
return -ESRCH;
|
||||
|
||||
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
|
||||
flags |= FOLL_WRITE;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
|
||||
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
|
||||
/*
|
||||
* check that we only use anonymous memory to prevent problems
|
||||
* with writeback
|
||||
*/
|
||||
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
|
||||
struct vm_area_struct *vma;
|
||||
/* user pages may cross multiple VMAs */
|
||||
gtt->nr_ranges = 0;
|
||||
do {
|
||||
unsigned long vm_start;
|
||||
|
||||
vma = find_vma(mm, gtt->userptr);
|
||||
if (!vma || vma->vm_file || vma->vm_end < end) {
|
||||
up_read(&mm->mmap_sem);
|
||||
return -EPERM;
|
||||
if (gtt->nr_ranges >= MAX_NR_VMAS) {
|
||||
DRM_ERROR("Too many VMAs in userptr range\n");
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vm_start = vma ? vma->vm_end : start;
|
||||
vma = find_vma(mm, vm_start);
|
||||
if (unlikely(!vma || vm_start < vma->vm_start)) {
|
||||
r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
vmas[gtt->nr_ranges++] = vma;
|
||||
} while (end > vma->vm_end);
|
||||
|
||||
DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
|
||||
start, gtt->nr_ranges, ttm->num_pages);
|
||||
|
||||
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
|
||||
vmas[0]->vm_file)) {
|
||||
r = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* loop enough times using contiguous pages of memory */
|
||||
do {
|
||||
unsigned num_pages = ttm->num_pages - pinned;
|
||||
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
|
||||
struct page **p = pages + pinned;
|
||||
struct amdgpu_ttm_gup_task_list guptask;
|
||||
ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
|
||||
if (unlikely(!ranges)) {
|
||||
r = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
guptask.task = current;
|
||||
spin_lock(>t->guptasklock);
|
||||
list_add(&guptask.list, >t->guptasks);
|
||||
spin_unlock(>t->guptasklock);
|
||||
pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
|
||||
if (unlikely(!pfns)) {
|
||||
r = -ENOMEM;
|
||||
goto out_free_ranges;
|
||||
}
|
||||
|
||||
if (mm == current->mm)
|
||||
r = get_user_pages(userptr, num_pages, flags, p, NULL);
|
||||
else
|
||||
r = get_user_pages_remote(gtt->usertask,
|
||||
mm, userptr, num_pages,
|
||||
flags, p, NULL, NULL);
|
||||
for (i = 0; i < gtt->nr_ranges; i++)
|
||||
amdgpu_hmm_init_range(&ranges[i]);
|
||||
|
||||
spin_lock(>t->guptasklock);
|
||||
list_del(&guptask.list);
|
||||
spin_unlock(>t->guptasklock);
|
||||
f = ranges[0].flags[HMM_PFN_VALID];
|
||||
f |= amdgpu_ttm_tt_is_readonly(ttm) ?
|
||||
0 : ranges[0].flags[HMM_PFN_WRITE];
|
||||
memset64(pfns, f, ttm->num_pages);
|
||||
|
||||
if (r < 0)
|
||||
goto release_pages;
|
||||
for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
|
||||
ranges[i].vma = vmas[i];
|
||||
ranges[i].start = max(start, vmas[i]->vm_start);
|
||||
ranges[i].end = min(end, vmas[i]->vm_end);
|
||||
ranges[i].pfns = pfns + nr_pages;
|
||||
nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
|
||||
|
||||
pinned += r;
|
||||
r = hmm_vma_fault(&ranges[i], true);
|
||||
if (unlikely(r))
|
||||
break;
|
||||
}
|
||||
if (unlikely(r)) {
|
||||
while (i--)
|
||||
hmm_vma_range_done(&ranges[i]);
|
||||
|
||||
} while (pinned < ttm->num_pages);
|
||||
goto out_free_pfns;
|
||||
}
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
for (i = 0; i < ttm->num_pages; i++) {
|
||||
pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
|
||||
if (!pages[i]) {
|
||||
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
|
||||
i, pfns[i]);
|
||||
goto out_invalid_pfn;
|
||||
}
|
||||
}
|
||||
gtt->ranges = ranges;
|
||||
|
||||
return 0;
|
||||
|
||||
release_pages:
|
||||
release_pages(pages, pinned);
|
||||
out_free_pfns:
|
||||
kvfree(pfns);
|
||||
out_free_ranges:
|
||||
kvfree(ranges);
|
||||
out:
|
||||
up_read(&mm->mmap_sem);
|
||||
|
||||
return r;
|
||||
|
||||
out_invalid_pfn:
|
||||
for (i = 0; i < gtt->nr_ranges; i++)
|
||||
hmm_vma_range_done(&ranges[i]);
|
||||
kvfree(pfns);
|
||||
kvfree(ranges);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
|
||||
* Check if the pages backing this ttm range have been invalidated
|
||||
*
|
||||
* Returns: true if pages are still valid
|
||||
*/
|
||||
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
bool r = false;
|
||||
int i;
|
||||
|
||||
if (!gtt || !gtt->userptr)
|
||||
return false;
|
||||
|
||||
DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
|
||||
gtt->userptr, gtt->nr_ranges, ttm->num_pages);
|
||||
|
||||
WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
|
||||
"No user pages to check\n");
|
||||
|
||||
if (gtt->ranges) {
|
||||
for (i = 0; i < gtt->nr_ranges; i++)
|
||||
r |= hmm_vma_range_done(>t->ranges[i]);
|
||||
kvfree(gtt->ranges[0].pfns);
|
||||
kvfree(gtt->ranges);
|
||||
gtt->ranges = NULL;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
|
||||
|
@ -807,39 +885,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
|||
*/
|
||||
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
unsigned i;
|
||||
|
||||
gtt->last_set_pages = atomic_read(>t->mmu_invalidations);
|
||||
for (i = 0; i < ttm->num_pages; ++i) {
|
||||
if (ttm->pages[i])
|
||||
put_page(ttm->pages[i]);
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < ttm->num_pages; ++i)
|
||||
ttm->pages[i] = pages ? pages[i] : NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
|
||||
*
|
||||
* Called while unpinning userptr pages
|
||||
*/
|
||||
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ttm->num_pages; ++i) {
|
||||
struct page *page = ttm->pages[i];
|
||||
|
||||
if (!page)
|
||||
continue;
|
||||
|
||||
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
|
||||
set_page_dirty(page);
|
||||
|
||||
mark_page_accessed(page);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
|
|||
/* unmap the pages mapped to the device */
|
||||
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
|
||||
|
||||
/* mark the pages as dirty */
|
||||
amdgpu_ttm_tt_mark_user_pages(ttm);
|
||||
|
||||
sg_free_table(ttm->sg);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||
if (gtt->ranges &&
|
||||
ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0],
|
||||
gtt->ranges[0].pfns[0]))
|
||||
WARN_ONCE(1, "Missing get_user_page_done\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
|
||||
|
@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
|||
gtt->usertask = current->group_leader;
|
||||
get_task_struct(gtt->usertask);
|
||||
|
||||
spin_lock_init(>t->guptasklock);
|
||||
INIT_LIST_HEAD(>t->guptasks);
|
||||
atomic_set(>t->mmu_invalidations, 0);
|
||||
gtt->last_set_pages = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
|||
unsigned long end)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
struct amdgpu_ttm_gup_task_list *entry;
|
||||
unsigned long size;
|
||||
|
||||
if (gtt == NULL || !gtt->userptr)
|
||||
|
@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
|||
if (gtt->userptr > end || gtt->userptr + size <= start)
|
||||
return false;
|
||||
|
||||
/* Search the lists of tasks that hold this mapping and see
|
||||
* if current is one of them. If it is return false.
|
||||
*/
|
||||
spin_lock(>t->guptasklock);
|
||||
list_for_each_entry(entry, >t->guptasks, list) {
|
||||
if (entry->task == current) {
|
||||
spin_unlock(>t->guptasklock);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
spin_unlock(>t->guptasklock);
|
||||
|
||||
atomic_inc(>t->mmu_invalidations);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
|
||||
* amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
|
||||
*/
|
||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
||||
int *last_invalidated)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
int prev_invalidated = *last_invalidated;
|
||||
|
||||
*last_invalidated = atomic_read(>t->mmu_invalidations);
|
||||
return prev_invalidated != *last_invalidated;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
|
||||
* been invalidated since the last time they've been set?
|
||||
*/
|
||||
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
|
||||
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
|
||||
{
|
||||
struct amdgpu_ttm_tt *gtt = (void *)ttm;
|
||||
|
||||
if (gtt == NULL || !gtt->userptr)
|
||||
return false;
|
||||
|
||||
return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
|
||||
/* Initialize various on-chip memory pools */
|
||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
|
||||
adev->gds.mem.total_size);
|
||||
adev->gds.gds_size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed initializing GDS heap.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
|
||||
4, AMDGPU_GEM_DOMAIN_GDS,
|
||||
&adev->gds.gds_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
|
||||
adev->gds.gws.total_size);
|
||||
adev->gds.gws_size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed initializing gws heap.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
|
||||
1, AMDGPU_GEM_DOMAIN_GWS,
|
||||
&adev->gds.gws_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
|
||||
adev->gds.oa.total_size);
|
||||
adev->gds.oa_size);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed initializing oa heap.\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
|
||||
1, AMDGPU_GEM_DOMAIN_OA,
|
||||
&adev->gds.oa_gfx_bo, NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Register debugfs entries for amdgpu_ttm */
|
||||
r = amdgpu_ttm_debugfs_init(adev);
|
||||
if (r) {
|
||||
|
|
|
@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
|
|||
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
|
||||
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
|
||||
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
|
||||
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
|
||||
#else
|
||||
static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
|
||||
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
|
||||
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
|
||||
uint32_t flags);
|
||||
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
|
||||
|
@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
|
|||
unsigned long end);
|
||||
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
|
||||
int *last_invalidated);
|
||||
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
|
||||
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
|
||||
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
|
||||
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
|
||||
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
|
||||
|
|
|
@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
|
|||
return AMDGPU_FW_LOAD_DIRECT;
|
||||
}
|
||||
|
||||
#define FW_VERSION_ATTR(name, mode, field) \
|
||||
static ssize_t show_##name(struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
struct drm_device *ddev = dev_get_drvdata(dev); \
|
||||
struct amdgpu_device *adev = ddev->dev_private; \
|
||||
\
|
||||
return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
|
||||
} \
|
||||
static DEVICE_ATTR(name, mode, show_##name, NULL)
|
||||
|
||||
FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
|
||||
FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
|
||||
FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
|
||||
FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
|
||||
FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
|
||||
FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
|
||||
FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
|
||||
FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
|
||||
FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
|
||||
FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
|
||||
FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
|
||||
FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
|
||||
FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
|
||||
FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
|
||||
FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
|
||||
FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
|
||||
FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
|
||||
FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
|
||||
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
|
||||
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
|
||||
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
|
||||
|
||||
static struct attribute *fw_attrs[] = {
|
||||
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
|
||||
&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
|
||||
&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
|
||||
&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
|
||||
&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
|
||||
&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
|
||||
&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
|
||||
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
|
||||
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
|
||||
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
|
||||
&dev_attr_dmcu_fw_version.attr, NULL
|
||||
};
|
||||
|
||||
static const struct attribute_group fw_attr_group = {
|
||||
.name = "fw_version",
|
||||
.attrs = fw_attrs
|
||||
};
|
||||
|
||||
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
|
||||
{
|
||||
return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
|
||||
}
|
||||
|
||||
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
|
||||
}
|
||||
|
||||
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
|
||||
struct amdgpu_firmware_info *ucode,
|
||||
uint64_t mc_addr, void *kptr)
|
||||
|
|
|
@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
|
|||
|
||||
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
|
||||
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
|
||||
int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
|
||||
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
|
||||
void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
|
||||
|
||||
enum amdgpu_firmware_load_type
|
||||
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
|
||||
|
|
|
@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state)
|
||||
{
|
||||
int ret_code;
|
||||
uint32_t reg_data = 0;
|
||||
uint32_t reg_data2 = 0;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
|
||||
ret_code = 0;
|
||||
|
||||
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
|
||||
if (!ret_code) {
|
||||
/* pause DPG non-jpeg */
|
||||
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
|
||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
}
|
||||
} else {
|
||||
/* unpause dpg non-jpeg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
|
||||
ret_code = 0;
|
||||
|
||||
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
|
||||
if (!ret_code) {
|
||||
/* Make sure JPRG Snoop is disabled before sending the pause */
|
||||
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
|
||||
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
|
||||
|
||||
/* pause DPG jpeg */
|
||||
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
|
||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
|
||||
lower_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
|
||||
upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
}
|
||||
} else {
|
||||
/* unpause dpg jpeg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.jpeg = new_state->jpeg;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev =
|
||||
|
@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
|
|||
else
|
||||
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
|
||||
|
||||
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
|
||||
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||
}
|
||||
|
||||
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
|
||||
|
@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
|
|||
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
|
||||
new_state.jpeg = VCN_DPG_STATE__PAUSE;
|
||||
|
||||
amdgpu_vcn_pause_dpg_mode(adev, &new_state);
|
||||
adev->vcn.pause_dpg_mode(adev, &new_state);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,27 @@
|
|||
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
|
||||
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
|
||||
|
||||
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
|
||||
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
|
||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
|
||||
})
|
||||
|
||||
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
|
||||
do { \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
|
||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||
} while (0)
|
||||
|
||||
enum engine_status_constants {
|
||||
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
|
||||
|
@ -81,6 +102,8 @@ struct amdgpu_vcn {
|
|||
unsigned num_enc_rings;
|
||||
enum amd_powergating_state cur_state;
|
||||
struct dpg_pause_state pause_state;
|
||||
int (*pause_dpg_mode)(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state);
|
||||
};
|
||||
|
||||
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
|
|||
return clk;
|
||||
}
|
||||
|
||||
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (virt->ops && virt->ops->init_reg_access_mode)
|
||||
virt->ops->init_reg_access_mode(adev);
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
|
||||
&& !(amdgpu_sriov_runtime(adev)))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
|
||||
{
|
||||
bool ret = false;
|
||||
struct amdgpu_virt *virt = &adev->virt;
|
||||
|
||||
if (amdgpu_sriov_vf(adev)
|
||||
&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
|
||||
ret = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
|
|||
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
|
||||
};
|
||||
|
||||
/* According to the fw feature, some new reg access modes are supported */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
|
||||
#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
|
||||
#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
|
||||
|
||||
/**
|
||||
* struct amdgpu_virt_ops - amdgpu device virt operations
|
||||
*/
|
||||
|
@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
|
|||
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
|
||||
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
|
||||
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
|
||||
void (*init_reg_access_mode)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -258,6 +265,7 @@ struct amdgpu_virt {
|
|||
uint32_t gim_feature;
|
||||
/* protect DPM events to GIM */
|
||||
struct mutex dpm_mutex;
|
||||
uint32_t reg_access_mode;
|
||||
};
|
||||
|
||||
#define amdgpu_sriov_enabled(adev) \
|
||||
|
@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
|
|||
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
|
||||
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
|
||||
|
||||
void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
|
|||
return &hive->device_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* DOC: AMDGPU XGMI Support
|
||||
*
|
||||
* XGMI is a high speed interconnect that joins multiple GPU cards
|
||||
* into a homogeneous memory space that is organized by a collective
|
||||
* hive ID and individual node IDs, both of which are 64-bit numbers.
|
||||
*
|
||||
* The file xgmi_device_id contains the unique per GPU device ID and
|
||||
* is stored in the /sys/class/drm/card${cardno}/device/ directory.
|
||||
*
|
||||
* Inside the device directory a sub-directory 'xgmi_hive_info' is
|
||||
* created which contains the hive ID and the list of nodes.
|
||||
*
|
||||
* The hive ID is stored in:
|
||||
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
|
||||
*
|
||||
* The node information is stored in numbered directories:
|
||||
* /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
|
||||
*
|
||||
* Each device has their own xgmi_hive_info direction with a mirror
|
||||
* set of node sub-directories.
|
||||
*
|
||||
* The XGMI memory space is built by contiguously adding the power of
|
||||
* two padded VRAM space from each node to each other.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
|
@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
|||
/* Each psp need to set the latest topology */
|
||||
ret = psp_xgmi_set_topology_info(&adev->psp,
|
||||
hive->number_devices,
|
||||
&hive->topology_info);
|
||||
&adev->psp.xgmi_context.top_info);
|
||||
if (ret)
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
|
@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
|||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *peer_adev)
|
||||
{
|
||||
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
|
||||
int i;
|
||||
|
||||
for (i = 0 ; i < top->num_nodes; ++i)
|
||||
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
|
||||
return top->nodes[i].num_hops;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||
{
|
||||
struct psp_xgmi_topology_info *hive_topology;
|
||||
struct psp_xgmi_topology_info *top_info;
|
||||
struct amdgpu_hive_info *hive;
|
||||
struct amdgpu_xgmi *entry;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
|||
goto exit;
|
||||
}
|
||||
|
||||
hive_topology = &hive->topology_info;
|
||||
top_info = &adev->psp.xgmi_context.top_info;
|
||||
|
||||
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
|
||||
list_for_each_entry(entry, &hive->device_list, head)
|
||||
hive_topology->nodes[count++].node_id = entry->node_id;
|
||||
top_info->nodes[count++].node_id = entry->node_id;
|
||||
top_info->num_nodes = count;
|
||||
hive->number_devices = count;
|
||||
|
||||
/* Each psp need to get the latest topology */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
|
||||
/* update node list for other device in the hive */
|
||||
if (tmp_adev != adev) {
|
||||
top_info = &tmp_adev->psp.xgmi_context.top_info;
|
||||
top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
|
||||
top_info->num_nodes = count;
|
||||
}
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* get latest topology info for each device from psp */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
|
||||
&tmp_adev->psp.xgmi_context.top_info);
|
||||
if (ret) {
|
||||
dev_err(tmp_adev->dev,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.node_id,
|
||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
break;
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
|
||||
|
||||
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
exit:
|
||||
if (!ret)
|
||||
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
|
||||
|
@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
|||
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
|
||||
ret);
|
||||
|
||||
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
struct amdgpu_hive_info {
|
||||
uint64_t hive_id;
|
||||
struct list_head device_list;
|
||||
struct psp_xgmi_topology_info topology_info;
|
||||
int number_devices;
|
||||
struct mutex hive_lock, reset_lock;
|
||||
struct kobject *kobj;
|
||||
|
@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
|
|||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
|
||||
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *peer_adev);
|
||||
|
||||
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
|
||||
struct amdgpu_device *bo_adev)
|
||||
|
|
|
@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t nak_r, nak_g;
|
||||
|
||||
/* Get the number of NAKs received and generated */
|
||||
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||
|
||||
/* Add the total number of NAKs, i.e the number of replays */
|
||||
return (nak_r + nak_g);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs cik_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &cik_read_disabled_bios,
|
||||
|
@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
|||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &cik_get_pcie_usage,
|
||||
.need_reset_on_init = &cik_need_reset_on_init,
|
||||
.get_pcie_replay_count = &cik_get_pcie_replay_count,
|
||||
};
|
||||
|
||||
static int cik_common_early_init(void *handle)
|
||||
|
|
|
@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
|
|||
*flags |= AMD_CG_SUPPORT_DF_MGCG;
|
||||
}
|
||||
|
||||
/* hold counter assignment per gpu struct */
|
||||
struct df_v3_6_event_mask {
|
||||
struct amdgpu_device gpu;
|
||||
uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
|
||||
};
|
||||
|
||||
/* get assigned df perfmon ctr as int */
|
||||
static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
int *counter)
|
||||
{
|
||||
struct df_v3_6_event_mask *mask;
|
||||
int i;
|
||||
|
||||
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||
|
||||
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
|
||||
if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
|
||||
*counter = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get address based on counter assignment */
|
||||
static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
int is_ctrl,
|
||||
uint32_t *lo_base_addr,
|
||||
uint32_t *hi_base_addr)
|
||||
{
|
||||
|
||||
int target_cntr = -1;
|
||||
|
||||
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||
|
||||
if (target_cntr < 0)
|
||||
return;
|
||||
|
||||
switch (target_cntr) {
|
||||
|
||||
case 0:
|
||||
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
|
||||
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
|
||||
break;
|
||||
case 1:
|
||||
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
|
||||
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
|
||||
break;
|
||||
case 2:
|
||||
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
|
||||
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
|
||||
break;
|
||||
case 3:
|
||||
*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
|
||||
*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* get read counter address */
|
||||
static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
uint32_t *lo_base_addr,
|
||||
uint32_t *hi_base_addr)
|
||||
{
|
||||
df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
|
||||
}
|
||||
|
||||
/* get control counter settings i.e. address and values to set */
|
||||
static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
uint32_t *lo_base_addr,
|
||||
uint32_t *hi_base_addr,
|
||||
uint32_t *lo_val,
|
||||
uint32_t *hi_val)
|
||||
{
|
||||
|
||||
uint32_t eventsel, instance, unitmask;
|
||||
uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
|
||||
|
||||
df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
|
||||
|
||||
if (lo_val == NULL || hi_val == NULL)
|
||||
return;
|
||||
|
||||
if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
|
||||
DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
|
||||
*lo_base_addr, *hi_base_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
eventsel = GET_EVENT(config);
|
||||
instance = GET_INSTANCE(config);
|
||||
unitmask = GET_UNITMASK(config);
|
||||
|
||||
es_5_0 = eventsel & 0x3FUL;
|
||||
es_13_6 = instance;
|
||||
es_13_0 = (es_13_6 << 6) + es_5_0;
|
||||
es_13_12 = (es_13_0 & 0x03000UL) >> 12;
|
||||
es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
|
||||
es_7_0 = es_13_0 & 0x0FFUL;
|
||||
*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
|
||||
*hi_val = (es_11_8 | ((es_13_12)<<(29)));
|
||||
}
|
||||
|
||||
/* assign df performance counters for read */
|
||||
static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
int *is_assigned)
|
||||
{
|
||||
|
||||
struct df_v3_6_event_mask *mask;
|
||||
int i, target_cntr;
|
||||
|
||||
target_cntr = -1;
|
||||
|
||||
*is_assigned = 0;
|
||||
|
||||
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||
|
||||
if (target_cntr >= 0) {
|
||||
*is_assigned = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||
|
||||
for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
|
||||
if (mask->config_assign_mask[i] == 0ULL) {
|
||||
mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/* release performance counter */
|
||||
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
|
||||
uint64_t config)
|
||||
{
|
||||
|
||||
struct df_v3_6_event_mask *mask;
|
||||
int target_cntr;
|
||||
|
||||
target_cntr = -1;
|
||||
|
||||
df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
|
||||
|
||||
mask = container_of(adev, struct df_v3_6_event_mask, gpu);
|
||||
|
||||
if (target_cntr >= 0)
|
||||
mask->config_assign_mask[target_cntr] = 0ULL;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* get xgmi link counters via programmable data fabric (df) counters (max 4)
|
||||
* using cake tx event.
|
||||
*
|
||||
* @adev -> amdgpu device
|
||||
* @instance-> currently cake has 2 links to poll on vega20
|
||||
* @count -> counters to pass
|
||||
*
|
||||
*/
|
||||
|
||||
static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||
int instance,
|
||||
uint64_t *count)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||
uint64_t config;
|
||||
|
||||
config = GET_INSTANCE_CONFIG(instance);
|
||||
|
||||
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
|
||||
&hi_base_addr);
|
||||
|
||||
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||
return;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
hi_val = RREG32_PCIE(hi_base_addr);
|
||||
|
||||
*count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* reset xgmi link counters
|
||||
*
|
||||
* @adev -> amdgpu device
|
||||
* @instance-> currently cake has 2 links to poll on vega20
|
||||
*
|
||||
*/
|
||||
static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||
int instance)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr;
|
||||
uint64_t config;
|
||||
|
||||
config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
|
||||
|
||||
df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
|
||||
&hi_base_addr);
|
||||
|
||||
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||
return;
|
||||
|
||||
WREG32_PCIE(lo_base_addr, 0UL);
|
||||
WREG32_PCIE(hi_base_addr, 0UL);
|
||||
}
|
||||
|
||||
/*
|
||||
* add xgmi link counters
|
||||
*
|
||||
* @adev -> amdgpu device
|
||||
* @instance-> currently cake has 2 links to poll on vega20
|
||||
*
|
||||
*/
|
||||
|
||||
static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||
int instance)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
|
||||
uint64_t config;
|
||||
int ret, is_assigned;
|
||||
|
||||
if (instance < 0 || instance > 1)
|
||||
return -EINVAL;
|
||||
|
||||
config = GET_INSTANCE_CONFIG(instance);
|
||||
|
||||
ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
|
||||
|
||||
if (ret || is_assigned)
|
||||
return ret;
|
||||
|
||||
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
&lo_val,
|
||||
&hi_val);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val);
|
||||
WREG32_PCIE(hi_base_addr, hi_val);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* start xgmi link counters
|
||||
*
|
||||
* @adev -> amdgpu device
|
||||
* @instance-> currently cake has 2 links to poll on vega20
|
||||
* @is_enable -> either resume or assign event via df perfmon
|
||||
*
|
||||
*/
|
||||
|
||||
static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||
int instance,
|
||||
int is_enable)
|
||||
{
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||
uint64_t config;
|
||||
int ret;
|
||||
|
||||
if (instance < 0 || instance > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_enable) {
|
||||
|
||||
ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
} else {
|
||||
|
||||
config = GET_INSTANCE_CONFIG(instance);
|
||||
|
||||
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
if (lo_base_addr == 0)
|
||||
return -EINVAL;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* start xgmi link counters
|
||||
*
|
||||
* @adev -> amdgpu device
|
||||
* @instance-> currently cake has 2 links to poll on vega20
|
||||
* @is_enable -> either pause or unassign event via df perfmon
|
||||
*
|
||||
*/
|
||||
|
||||
static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
|
||||
int instance,
|
||||
int is_disable)
|
||||
{
|
||||
|
||||
uint32_t lo_base_addr, hi_base_addr, lo_val;
|
||||
uint64_t config;
|
||||
|
||||
config = GET_INSTANCE_CONFIG(instance);
|
||||
|
||||
if (is_disable) {
|
||||
df_v3_6_reset_xgmi_link_cntr(adev, instance);
|
||||
df_v3_6_pmc_release_cntr(adev, config);
|
||||
} else {
|
||||
|
||||
df_v3_6_pmc_get_ctrl_settings(adev,
|
||||
config,
|
||||
&lo_base_addr,
|
||||
&hi_base_addr,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
if ((lo_base_addr == 0) || (hi_base_addr == 0))
|
||||
return -EINVAL;
|
||||
|
||||
lo_val = RREG32_PCIE(lo_base_addr);
|
||||
|
||||
WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_enable)
|
||||
{
|
||||
int xgmi_tx_link, ret = 0;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA20:
|
||||
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||
|
||||
if (xgmi_tx_link >= 0)
|
||||
ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
|
||||
is_enable);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
|
||||
int is_disable)
|
||||
{
|
||||
int xgmi_tx_link, ret = 0;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA20:
|
||||
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||
|
||||
if (xgmi_tx_link >= 0) {
|
||||
ret = df_v3_6_stop_xgmi_link_cntr(adev,
|
||||
xgmi_tx_link,
|
||||
is_disable);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
|
||||
uint64_t config,
|
||||
uint64_t *count)
|
||||
{
|
||||
|
||||
int xgmi_tx_link;
|
||||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA20:
|
||||
xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
|
||||
: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
|
||||
|
||||
if (xgmi_tx_link >= 0) {
|
||||
df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
|
||||
df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const struct amdgpu_df_funcs df_v3_6_funcs = {
|
||||
.init = df_v3_6_init,
|
||||
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
|
||||
|
@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
|
|||
.update_medium_grain_clock_gating =
|
||||
df_v3_6_update_medium_grain_clock_gating,
|
||||
.get_clockgating_state = df_v3_6_get_clockgating_state,
|
||||
.pmc_start = df_v3_6_pmc_start,
|
||||
.pmc_stop = df_v3_6_pmc_stop,
|
||||
.pmc_get_count = df_v3_6_pmc_get_count
|
||||
};
|
||||
|
|
|
@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
|
|||
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
|
||||
};
|
||||
|
||||
/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
|
||||
#define AMDGPU_DF_MAX_COUNTERS 4
|
||||
|
||||
/* get flags from df perfmon config */
|
||||
#define GET_EVENT(x) (x & 0xFFUL)
|
||||
#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
|
||||
#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
|
||||
#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \
|
||||
| ((0x046ULL + x) << 8) \
|
||||
| (0x02 << 16))
|
||||
|
||||
/* df event conf macros */
|
||||
#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
|
||||
&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
|
||||
#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
|
||||
&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
|
||||
|
||||
extern const struct amdgpu_df_funcs df_v3_6_funcs;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle)
|
|||
|
||||
static int gfx_v7_0_sw_fini(void *handle)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||
|
@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
|
|||
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* init asci gds info */
|
||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws_size = 64;
|
||||
adev->gds.oa_size = 16;
|
||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||
|
||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
||||
adev->gds.mem.gfx_partition_size = 4096;
|
||||
adev->gds.mem.cs_partition_size = 4096;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 4;
|
||||
adev->gds.gws.cs_partition_size = 4;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 1;
|
||||
} else {
|
||||
adev->gds.mem.gfx_partition_size = 1024;
|
||||
adev->gds.mem.cs_partition_size = 1024;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 16;
|
||||
adev->gds.gws.cs_partition_size = 16;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle)
|
|||
|
||||
static int gfx_v8_0_sw_fini(void *handle)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||
|
@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
|
|||
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
|
||||
{
|
||||
/* init asci gds info */
|
||||
adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
|
||||
adev->gds.gws_size = 64;
|
||||
adev->gds.oa_size = 16;
|
||||
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
|
||||
|
||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
||||
adev->gds.mem.gfx_partition_size = 4096;
|
||||
adev->gds.mem.cs_partition_size = 4096;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 4;
|
||||
adev->gds.gws.cs_partition_size = 4;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 1;
|
||||
} else {
|
||||
adev->gds.mem.gfx_partition_size = 1024;
|
||||
adev->gds.mem.cs_partition_size = 1024;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 16;
|
||||
adev->gds.gws.cs_partition_size = 16;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "vega10_enum.h"
|
||||
#include "hdp/hdp_4_0_offset.h"
|
||||
|
||||
#include "soc15.h"
|
||||
#include "soc15_common.h"
|
||||
#include "clearstate_gfx9.h"
|
||||
#include "v9_structs.h"
|
||||
|
@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_9_0,
|
||||
ARRAY_SIZE(golden_settings_gc_9_0));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_9_0_vg10,
|
||||
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_9_0,
|
||||
ARRAY_SIZE(golden_settings_gc_9_0));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_gc_9_0_vg10,
|
||||
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
|
||||
}
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
soc15_program_register_sequence(adev,
|
||||
|
@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
|
|||
|
||||
/* GDS reserve memory: 64 bytes alignment */
|
||||
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
|
||||
adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
|
||||
adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
|
||||
adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
|
||||
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
|
||||
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
|
||||
|
||||
|
@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
|
|||
|
||||
gfx_v9_0_write_data_to_reg(ring, 0, false,
|
||||
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
|
||||
(adev->gds.mem.total_size +
|
||||
(adev->gds.gds_size +
|
||||
adev->gfx.ngg.gds_reserve_size));
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
|
||||
|
@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle)
|
|||
kfree(ras_if);
|
||||
}
|
||||
|
||||
amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
|
@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
|
|||
else
|
||||
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
|
||||
}
|
||||
|
||||
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
|
||||
|
@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
|
|||
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
|
||||
soc15_grbm_select(adev, 0, 0, 0, i);
|
||||
/* CP and shaders */
|
||||
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
|
||||
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
|
||||
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
|
||||
WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
|
||||
}
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
|
|||
u32 tmp;
|
||||
int i;
|
||||
|
||||
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
|
||||
WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
|
||||
|
||||
gfx_v9_0_tiling_mode_table_init(adev);
|
||||
|
||||
|
@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
|
|||
*/
|
||||
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
|
||||
WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
|
||||
(adev->gfx.config.sc_prim_fifo_size_frontend <<
|
||||
PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
|
||||
(adev->gfx.config.sc_prim_fifo_size_backend <<
|
||||
|
@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
|
|||
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
|
||||
{
|
||||
/* csib */
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
|
||||
adev->gfx.rlc.clear_state_gpu_addr >> 32);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
|
||||
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
|
||||
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
|
||||
adev->gfx.rlc.clear_state_size);
|
||||
}
|
||||
|
||||
|
@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
|||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
}
|
||||
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
|
||||
udelay(50);
|
||||
}
|
||||
|
||||
|
@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
|
|||
int i;
|
||||
|
||||
if (enable) {
|
||||
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
|
||||
} else {
|
||||
WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
|
||||
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
|
@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
|
|||
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
|
||||
tmp &= 0xffffff00;
|
||||
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
|
||||
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||
tmp |= 0x80;
|
||||
WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||
WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
|
||||
}
|
||||
|
||||
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
|
||||
|
@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
|
|||
/* disable wptr polling */
|
||||
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
|
||||
mqd->cp_hqd_eop_base_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
|
||||
mqd->cp_hqd_eop_base_addr_hi);
|
||||
|
||||
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
|
||||
mqd->cp_hqd_eop_control);
|
||||
|
||||
/* enable doorbell? */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
|
||||
mqd->cp_hqd_pq_doorbell_control);
|
||||
|
||||
/* disable the queue if it's active */
|
||||
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||
for (j = 0; j < adev->usec_timeout; j++) {
|
||||
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
||||
break;
|
||||
udelay(1);
|
||||
}
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||
mqd->cp_hqd_dequeue_request);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
|
||||
mqd->cp_hqd_pq_rptr);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||
mqd->cp_hqd_pq_wptr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||
mqd->cp_hqd_pq_wptr_hi);
|
||||
}
|
||||
|
||||
/* set the pointer to the MQD */
|
||||
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
|
||||
mqd->cp_mqd_base_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
|
||||
mqd->cp_mqd_base_addr_hi);
|
||||
|
||||
/* set MQD vmid to 0 */
|
||||
WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
|
||||
mqd->cp_mqd_control);
|
||||
|
||||
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
|
||||
mqd->cp_hqd_pq_base_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
|
||||
mqd->cp_hqd_pq_base_hi);
|
||||
|
||||
/* set up the HQD, this is similar to CP_RB0_CNTL */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
|
||||
mqd->cp_hqd_pq_control);
|
||||
|
||||
/* set the wb address whether it's enabled or not */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
|
||||
mqd->cp_hqd_pq_rptr_report_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
|
||||
mqd->cp_hqd_pq_rptr_report_addr_hi);
|
||||
|
||||
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
|
||||
mqd->cp_hqd_pq_wptr_poll_addr_hi);
|
||||
|
||||
/* enable the doorbell if requested */
|
||||
|
@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
|
|||
mqd->cp_hqd_pq_doorbell_control);
|
||||
|
||||
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
|
||||
mqd->cp_hqd_pq_wptr_lo);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
|
||||
mqd->cp_hqd_pq_wptr_hi);
|
||||
|
||||
/* set the vmid for the queue */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
|
||||
mqd->cp_hqd_persistent_state);
|
||||
|
||||
/* activate the queue */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
|
||||
mqd->cp_hqd_active);
|
||||
|
||||
if (ring->use_doorbell)
|
||||
|
@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
|
|||
/* disable the queue if it's active */
|
||||
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
|
||||
|
||||
for (j = 0; j < adev->usec_timeout; j++) {
|
||||
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
|
||||
|
@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
|
|||
DRM_DEBUG("KIQ dequeue request failed.\n");
|
||||
|
||||
/* Manual disable if dequeue request times out */
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
|
||||
}
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
|
||||
0);
|
||||
}
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
|
|||
(1 << (oa_size + oa_base)) - (1 << oa_base));
|
||||
}
|
||||
|
||||
static const u32 vgpr_init_compute_shader[] =
|
||||
{
|
||||
0xb07c0000, 0xbe8000ff,
|
||||
0x000000f8, 0xbf110800,
|
||||
0x7e000280, 0x7e020280,
|
||||
0x7e040280, 0x7e060280,
|
||||
0x7e080280, 0x7e0a0280,
|
||||
0x7e0c0280, 0x7e0e0280,
|
||||
0x80808800, 0xbe803200,
|
||||
0xbf84fff5, 0xbf9c0000,
|
||||
0xd28c0001, 0x0001007f,
|
||||
0xd28d0001, 0x0002027e,
|
||||
0x10020288, 0xb8810904,
|
||||
0xb7814000, 0xd1196a01,
|
||||
0x00000301, 0xbe800087,
|
||||
0xbefc00c1, 0xd89c4000,
|
||||
0x00020201, 0xd89cc080,
|
||||
0x00040401, 0x320202ff,
|
||||
0x00000800, 0x80808100,
|
||||
0xbf84fff8, 0x7e020280,
|
||||
0xbf810000, 0x00000000,
|
||||
};
|
||||
|
||||
static const u32 sgpr_init_compute_shader[] =
|
||||
{
|
||||
0xb07c0000, 0xbe8000ff,
|
||||
0x0000005f, 0xbee50080,
|
||||
0xbe812c65, 0xbe822c65,
|
||||
0xbe832c65, 0xbe842c65,
|
||||
0xbe852c65, 0xb77c0005,
|
||||
0x80808500, 0xbf84fff8,
|
||||
0xbe800080, 0xbf810000,
|
||||
};
|
||||
|
||||
static const struct soc15_reg_entry vgpr_init_regs[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
|
||||
};
|
||||
|
||||
static const struct soc15_reg_entry sgpr_init_regs[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
|
||||
};
|
||||
|
||||
static const struct soc15_reg_entry sec_ded_counter_registers[] = {
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
|
||||
{ SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
|
||||
};
|
||||
|
||||
static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
|
||||
struct amdgpu_ib ib;
|
||||
struct dma_fence *f = NULL;
|
||||
int r, i, j;
|
||||
unsigned total_size, vgpr_offset, sgpr_offset;
|
||||
u64 gpu_addr;
|
||||
|
||||
/* only support when RAS is enabled */
|
||||
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
|
||||
return 0;
|
||||
|
||||
/* bail if the compute ring is not ready */
|
||||
if (!ring->sched.ready)
|
||||
return 0;
|
||||
|
||||
total_size =
|
||||
((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
|
||||
total_size +=
|
||||
((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
|
||||
total_size = ALIGN(total_size, 256);
|
||||
vgpr_offset = total_size;
|
||||
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
|
||||
sgpr_offset = total_size;
|
||||
total_size += sizeof(sgpr_init_compute_shader);
|
||||
|
||||
/* allocate an indirect buffer to put the commands in */
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, total_size, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* load the compute shaders */
|
||||
for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
|
||||
ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
|
||||
ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
|
||||
|
||||
/* init the ib length to 0 */
|
||||
ib.length_dw = 0;
|
||||
|
||||
/* VGPR */
|
||||
/* write the register state for the compute dispatch */
|
||||
for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
|
||||
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
|
||||
}
|
||||
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
|
||||
gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
|
||||
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
|
||||
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
|
||||
|
||||
/* write dispatch packet */
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
|
||||
ib.ptr[ib.length_dw++] = 128; /* x */
|
||||
ib.ptr[ib.length_dw++] = 1; /* y */
|
||||
ib.ptr[ib.length_dw++] = 1; /* z */
|
||||
ib.ptr[ib.length_dw++] =
|
||||
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
|
||||
|
||||
/* write CS partial flush packet */
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
|
||||
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
|
||||
|
||||
/* SGPR */
|
||||
/* write the register state for the compute dispatch */
|
||||
for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
|
||||
ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
|
||||
}
|
||||
/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
|
||||
gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
|
||||
ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
|
||||
- PACKET3_SET_SH_REG_START;
|
||||
ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
|
||||
ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
|
||||
|
||||
/* write dispatch packet */
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
|
||||
ib.ptr[ib.length_dw++] = 128; /* x */
|
||||
ib.ptr[ib.length_dw++] = 1; /* y */
|
||||
ib.ptr[ib.length_dw++] = 1; /* z */
|
||||
ib.ptr[ib.length_dw++] =
|
||||
REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
|
||||
|
||||
/* write CS partial flush packet */
|
||||
ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
|
||||
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
|
||||
|
||||
/* shedule the ib on the ring */
|
||||
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* wait for the GPU to finish processing the IB */
|
||||
r = dma_fence_wait(f, false);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* read back registers to clear the counters */
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
for (j = 0; j < 16; j++) {
|
||||
gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
|
||||
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||
gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
|
||||
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||
gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
|
||||
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||
gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
|
||||
for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
|
||||
RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
|
||||
}
|
||||
WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
fail:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
dma_fence_put(f);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v9_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (*ras_if)
|
||||
/* requires IBs so do in late init after IB pool is initialized */
|
||||
r = gfx_v9_0_do_edc_gpr_workarounds(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* handle resume path. */
|
||||
if (*ras_if) {
|
||||
/* resend ras TA enable cmd during resume.
|
||||
* prepare to handle failure.
|
||||
*/
|
||||
ih_info.head = **ras_if;
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
/* request a gpu reset. will run again. */
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__GFX);
|
||||
return 0;
|
||||
}
|
||||
/* fail to enable ras, cleanup all. */
|
||||
goto irq;
|
||||
}
|
||||
/* enable successfully. continue. */
|
||||
goto resume;
|
||||
}
|
||||
|
||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||
if (!*ras_if)
|
||||
|
@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
|||
**ras_if = ras_block;
|
||||
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r)
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__GFX);
|
||||
r = 0;
|
||||
}
|
||||
goto feature;
|
||||
}
|
||||
|
||||
ih_info.head = **ras_if;
|
||||
fs_info.head = **ras_if;
|
||||
|
@ -3614,7 +3876,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
|
|||
feature:
|
||||
kfree(*ras_if);
|
||||
*ras_if = NULL;
|
||||
return -EINVAL;
|
||||
return r;
|
||||
}
|
||||
|
||||
static int gfx_v9_0_late_init(void *handle)
|
||||
|
@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
|
|||
mutex_lock(&adev->srbm_mutex);
|
||||
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
|
||||
WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
|
||||
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
|
||||
|
||||
soc15_grbm_select(adev, 0, 0, 0, 0);
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
|
|||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
adev->gds.mem.total_size = 0x10000;
|
||||
adev->gds.gds_size = 0x10000;
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
adev->gds.mem.total_size = 0x1000;
|
||||
adev->gds.gds_size = 0x1000;
|
||||
break;
|
||||
default:
|
||||
adev->gds.mem.total_size = 0x10000;
|
||||
adev->gds.gds_size = 0x10000;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
|
|||
break;
|
||||
}
|
||||
|
||||
adev->gds.gws.total_size = 64;
|
||||
adev->gds.oa.total_size = 16;
|
||||
|
||||
if (adev->gds.mem.total_size == 64 * 1024) {
|
||||
adev->gds.mem.gfx_partition_size = 4096;
|
||||
adev->gds.mem.cs_partition_size = 4096;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 4;
|
||||
adev->gds.gws.cs_partition_size = 4;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 1;
|
||||
} else {
|
||||
adev->gds.mem.gfx_partition_size = 1024;
|
||||
adev->gds.mem.cs_partition_size = 1024;
|
||||
|
||||
adev->gds.gws.gfx_partition_size = 16;
|
||||
adev->gds.gws.cs_partition_size = 16;
|
||||
|
||||
adev->gds.oa.gfx_partition_size = 4;
|
||||
adev->gds.oa.cs_partition_size = 4;
|
||||
}
|
||||
adev->gds.gws_size = 64;
|
||||
adev->gds.oa_size = 16;
|
||||
}
|
||||
|
||||
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||
|
|
|
@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
|||
uint64_t value;
|
||||
|
||||
/* Program the AGP BAR */
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
|
||||
|
||||
/* Program the system aperture low logical page number. */
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
|
||||
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
|
||||
|
||||
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
|
||||
|
@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
|||
* workaround that increase system aperture high address (add 1)
|
||||
* to get rid of the VM fault and hardware hang.
|
||||
*/
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
max((adev->gmc.fb_end >> 18) + 0x1,
|
||||
adev->gmc.agp_end >> 18));
|
||||
else
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||
|
||||
/* Set default page address. */
|
||||
|
@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
|
|||
MTYPE, MTYPE_UC);/* XXX for emulation. */
|
||||
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
|
||||
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
}
|
||||
|
||||
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
||||
|
@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
|
|||
* VF copy registers so vbios post doesn't program them, for
|
||||
* SRIOV driver need to program them
|
||||
*/
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
|
||||
adev->gmc.vram_start >> 24);
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
|
||||
adev->gmc.vram_end >> 24);
|
||||
}
|
||||
|
||||
|
@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
|||
MC_VM_MX_L1_TLB_CNTL,
|
||||
ENABLE_ADVANCED_DRIVER_MODEL,
|
||||
0);
|
||||
WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
|
||||
/* Setup L2 cache */
|
||||
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||
|
|
|
@ -289,7 +289,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
|
|||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Load the GDDR MC ucode into the hw (CIK).
|
||||
* Load the GDDR MC ucode into the hw (VI).
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
|
||||
|
@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
|
|||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set the location of vram, gart, and AGP in the GPU's
|
||||
* physical address space (CIK).
|
||||
* physical address space (VI).
|
||||
*/
|
||||
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
|
||||
{
|
||||
|
@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
|
|||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Look up the amount of vram, vram width, and decide how to place
|
||||
* vram and gart within the GPU's physical address space (CIK).
|
||||
* vram and gart within the GPU's physical address space (VI).
|
||||
* Returns 0 for success.
|
||||
*/
|
||||
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
||||
|
@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
|
|||
* @adev: amdgpu_device pointer
|
||||
* @vmid: vm instance to flush
|
||||
*
|
||||
* Flush the TLB for the requested page table (CIK).
|
||||
* Flush the TLB for the requested page table (VI).
|
||||
*/
|
||||
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
|
||||
uint32_t vmid, uint32_t flush_type)
|
||||
|
@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
|
|||
* This sets up the TLBs, programs the page tables for VMID0,
|
||||
* sets up the hw for VMIDs 1-15 which are allocated on
|
||||
* demand, and sets up the global locations for the LDS, GDS,
|
||||
* and GPUVM for FSA64 clients (CIK).
|
||||
* and GPUVM for FSA64 clients (VI).
|
||||
* Returns 0 for success, errors for failure.
|
||||
*/
|
||||
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
|
||||
|
@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
|
|||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* This disables all VM page table (CIK).
|
||||
* This disables all VM page table (VI).
|
||||
*/
|
||||
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
|
||||
{
|
||||
|
@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
|
|||
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
|
||||
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
|
||||
*
|
||||
* Print human readable fault information (CIK).
|
||||
* Print human readable fault information (VI).
|
||||
*/
|
||||
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
|
||||
u32 addr, u32 mc_client, unsigned pasid)
|
||||
|
|
|
@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
|||
return 0;
|
||||
}
|
||||
/* handle resume path. */
|
||||
if (*ras_if)
|
||||
if (*ras_if) {
|
||||
/* resend ras TA enable cmd during resume.
|
||||
* prepare to handle failure.
|
||||
*/
|
||||
ih_info.head = **ras_if;
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
/* request a gpu reset. will run again. */
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__UMC);
|
||||
return 0;
|
||||
}
|
||||
/* fail to enable ras, cleanup all. */
|
||||
goto irq;
|
||||
}
|
||||
/* enable successfully. continue. */
|
||||
goto resume;
|
||||
}
|
||||
|
||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||
if (!*ras_if)
|
||||
|
@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
|||
**ras_if = ras_block;
|
||||
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r)
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__UMC);
|
||||
r = 0;
|
||||
}
|
||||
goto feature;
|
||||
}
|
||||
|
||||
ih_info.head = **ras_if;
|
||||
fs_info.head = **ras_if;
|
||||
|
@ -731,7 +754,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
|
|||
feature:
|
||||
kfree(*ras_if);
|
||||
*ras_if = NULL;
|
||||
return -EINVAL;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
break;
|
||||
/* fall through */
|
||||
case CHIP_VEGA20:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_mmhub_1_0_0,
|
||||
|
@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
|
|||
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
|
||||
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
|
||||
|
||||
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
|
||||
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
|
||||
|
||||
/* After HDP is initialized, flush HDP.*/
|
||||
adev->nbio_funcs->hdp_flush(adev, NULL);
|
||||
|
||||
|
|
|
@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
|
|||
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
|
||||
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
return;
|
||||
|
||||
/* Set default page address. */
|
||||
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
|
||||
adev->vm_manager.vram_base_offset;
|
||||
|
@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
|
|||
{
|
||||
uint32_t tmp;
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
return;
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
|
||||
|
@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
|
|||
|
||||
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
|
||||
{
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
return;
|
||||
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
|
||||
0XFFFFFFFF);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
|
||||
|
@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
|||
0);
|
||||
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
|
||||
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
/* Setup L2 cache */
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
|
||||
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
|
|||
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
|
||||
{
|
||||
u32 tmp;
|
||||
|
||||
if (amdgpu_virt_support_skip_setting(adev))
|
||||
return;
|
||||
|
||||
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
|
||||
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
|
||||
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "nbio/nbio_6_1_sh_mask.h"
|
||||
#include "gc/gc_9_0_offset.h"
|
||||
#include "gc/gc_9_0_sh_mask.h"
|
||||
#include "mp/mp_9_0_offset.h"
|
||||
#include "soc15.h"
|
||||
#include "vega10_ih.h"
|
||||
#include "soc15_common.h"
|
||||
|
@ -343,7 +344,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
|||
|
||||
/* Trigger recovery for world switch failure if no TDR */
|
||||
if (amdgpu_device_should_recover_gpu(adev)
|
||||
&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
|
||||
&& adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
|
@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
|
|||
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
|
||||
}
|
||||
|
||||
static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
|
||||
uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
|
||||
|
||||
adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
|
||||
|
||||
if (rlc_fw_ver >= 0x5d)
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
|
||||
|
||||
if (sos_fw_ver >= 0x80455)
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
|
||||
|
||||
if (sos_fw_ver >= 0x8045b)
|
||||
adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
|
||||
}
|
||||
|
||||
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
||||
.req_full_gpu = xgpu_ai_request_full_gpu_access,
|
||||
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
|
||||
|
@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
|
|||
.trans_msg = xgpu_ai_mailbox_trans_msg,
|
||||
.get_pp_clk = xgpu_ai_get_pp_clk,
|
||||
.force_dpm_level = xgpu_ai_force_dpm_level,
|
||||
.init_reg_access_mode = xgpu_ai_init_reg_access_mode,
|
||||
};
|
||||
|
|
|
@ -29,9 +29,18 @@
|
|||
#include "nbio/nbio_7_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_smn.h"
|
||||
#include "vega10_enum.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
|
||||
|
||||
static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
|
||||
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
|
||||
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
|
||||
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
|
||||
}
|
||||
|
||||
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||
|
@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
|
|||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg)
|
||||
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
|
||||
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
else
|
||||
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
|
||||
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
|
||||
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
|
||||
|
@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
|
|||
.ih_control = nbio_v7_0_ih_control,
|
||||
.init_registers = nbio_v7_0_init_registers,
|
||||
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
|
||||
.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
|
||||
};
|
||||
|
|
|
@ -27,9 +27,18 @@
|
|||
#include "nbio/nbio_7_4_offset.h"
|
||||
#include "nbio/nbio_7_4_sh_mask.h"
|
||||
#include "nbio/nbio_7_4_0_smn.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
|
||||
|
||||
static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
|
||||
{
|
||||
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
|
||||
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
|
||||
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
|
||||
adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
|
||||
}
|
||||
|
||||
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
|
||||
|
@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
|
|||
struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring || !ring->funcs->emit_wreg)
|
||||
WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
|
||||
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
else
|
||||
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
|
||||
NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
|
||||
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
|
||||
}
|
||||
|
||||
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
|
||||
|
@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
|
|||
.ih_control = nbio_v7_4_ih_control,
|
||||
.init_registers = nbio_v7_4_init_registers,
|
||||
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
|
||||
.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
|
||||
};
|
||||
|
|
|
@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
|
|||
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
|
||||
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
|
||||
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
|
||||
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
|
||||
};
|
||||
|
||||
|
||||
|
@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
|
|||
enum psp_gfx_fw_type fw_type; /* FW type */
|
||||
};
|
||||
|
||||
/* Command to setup register program */
|
||||
struct psp_gfx_cmd_reg_prog {
|
||||
uint32_t reg_value;
|
||||
uint32_t reg_id;
|
||||
};
|
||||
|
||||
/* All GFX ring buffer commands. */
|
||||
union psp_gfx_commands
|
||||
{
|
||||
|
@ -226,6 +233,7 @@ union psp_gfx_commands
|
|||
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
|
||||
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
|
||||
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
|
||||
struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
|
|||
|
||||
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
|
||||
|
||||
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
|
||||
static int psp_v3_1_ring_stop(struct psp_context *psp,
|
||||
enum psp_ring_type ring_type);
|
||||
|
||||
static int psp_v3_1_init_microcode(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
|
|||
|
||||
psp_v3_1_reroute_ih(psp);
|
||||
|
||||
/* Write low address of the ring to C2PMSG_69 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_70 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
||||
/* Write size of ring to C2PMSG_71 */
|
||||
psp_ring_reg = ring->ring_size;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
||||
/* Write the ring initialization command to C2PMSG_64 */
|
||||
psp_ring_reg = ring_type;
|
||||
psp_ring_reg = psp_ring_reg << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||
ret = psp_v3_1_ring_stop(psp, ring_type);
|
||||
if (ret) {
|
||||
DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
/* Write low address of the ring to C2PMSG_102 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_103 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
|
||||
/* No size initialization for sriov */
|
||||
/* Write the ring initialization command to C2PMSG_101 */
|
||||
psp_ring_reg = ring_type;
|
||||
psp_ring_reg = psp_ring_reg << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x8000FFFF, false);
|
||||
/* there might be hardware handshake issue which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_101 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
|
||||
mmMP0_SMN_C2PMSG_101), 0x80000000,
|
||||
0x8000FFFF, false);
|
||||
} else {
|
||||
|
||||
/* Write low address of the ring to C2PMSG_69 */
|
||||
psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
|
||||
/* Write high address of the ring to C2PMSG_70 */
|
||||
psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
|
||||
/* Write size of ring to C2PMSG_71 */
|
||||
psp_ring_reg = ring->ring_size;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
|
||||
/* Write the ring initialization command to C2PMSG_64 */
|
||||
psp_ring_reg = ring_type;
|
||||
psp_ring_reg = psp_ring_reg << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
|
||||
/* there might be hardware handshake issue which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
|
||||
mmMP0_SMN_C2PMSG_64), 0x80000000,
|
||||
0x8000FFFF, false);
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
|
|||
unsigned int psp_ring_reg = 0;
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
||||
/* Write the ring destroy command to C2PMSG_64 */
|
||||
psp_ring_reg = 3 << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||
/* Write the Destroy GPCOM ring command to C2PMSG_101 */
|
||||
psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
|
||||
|
||||
/* there might be handshake issue with hardware which needs delay */
|
||||
mdelay(20);
|
||||
/* there might be handshake issue which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x80000000, false);
|
||||
/* Wait for response flag (bit 31) in C2PMSG_101 */
|
||||
ret = psp_wait_for(psp,
|
||||
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
|
||||
0x80000000, 0x80000000, false);
|
||||
} else {
|
||||
/* Write the ring destroy command to C2PMSG_64 */
|
||||
psp_ring_reg = 3 << 16;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
|
||||
|
||||
/* there might be handshake issue which needs delay */
|
||||
mdelay(20);
|
||||
|
||||
/* Wait for response flag (bit 31) in C2PMSG_64 */
|
||||
ret = psp_wait_for(psp,
|
||||
SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
|
||||
0x80000000, 0x80000000, false);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
|
|||
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
|
||||
|
||||
/* KM (GPCOM) prepare write pointer */
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
||||
if (psp_v3_1_support_vmr_ring(psp))
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
|
||||
else
|
||||
psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
|
||||
|
||||
/* Update KM RB frame pointer to new frame */
|
||||
/* write_frame ptr increments by size of rb_frame in bytes */
|
||||
|
@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
|
|||
|
||||
/* Update the write Pointer in DWORDs */
|
||||
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
||||
if (psp_v3_1_support_vmr_ring(psp)) {
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
|
||||
/* send interrupt to PSP for SRIOV ring write pointer update */
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
|
||||
GFX_CTRL_CMD_ID_CONSUME_CMD);
|
||||
} else
|
||||
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
|
||||
{
|
||||
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct psp_funcs psp_v3_1_funcs = {
|
||||
.init_microcode = psp_v3_1_init_microcode,
|
||||
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
|
||||
|
@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
|
|||
.compare_sram_data = psp_v3_1_compare_sram_data,
|
||||
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
|
||||
.mode1_reset = psp_v3_1_mode1_reset,
|
||||
.support_vmr_ring = psp_v3_1_support_vmr_ring,
|
||||
};
|
||||
|
||||
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
|
||||
|
|
|
@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
|
|||
{
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_VEGA10:
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_4,
|
||||
ARRAY_SIZE(golden_settings_sdma_4));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_vg10,
|
||||
ARRAY_SIZE(golden_settings_sdma_vg10));
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_4,
|
||||
ARRAY_SIZE(golden_settings_sdma_4));
|
||||
soc15_program_register_sequence(adev,
|
||||
golden_settings_sdma_vg10,
|
||||
ARRAY_SIZE(golden_settings_sdma_vg10));
|
||||
}
|
||||
break;
|
||||
case CHIP_VEGA12:
|
||||
soc15_program_register_sequence(adev,
|
||||
|
@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle)
|
|||
}
|
||||
|
||||
/* handle resume path. */
|
||||
if (*ras_if)
|
||||
if (*ras_if) {
|
||||
/* resend ras TA enable cmd during resume.
|
||||
* prepare to handle failure.
|
||||
*/
|
||||
ih_info.head = **ras_if;
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
/* request a gpu reset. will run again. */
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__SDMA);
|
||||
return 0;
|
||||
}
|
||||
/* fail to enable ras, cleanup all. */
|
||||
goto irq;
|
||||
}
|
||||
/* enable successfully. continue. */
|
||||
goto resume;
|
||||
}
|
||||
|
||||
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
|
||||
if (!*ras_if)
|
||||
|
@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle)
|
|||
**ras_if = ras_block;
|
||||
|
||||
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
|
||||
if (r)
|
||||
if (r) {
|
||||
if (r == -EAGAIN) {
|
||||
amdgpu_ras_request_reset_on_boot(adev,
|
||||
AMDGPU_RAS_BLOCK__SDMA);
|
||||
r = 0;
|
||||
}
|
||||
goto feature;
|
||||
}
|
||||
|
||||
ih_info.head = **ras_if;
|
||||
fs_info.head = **ras_if;
|
||||
|
@ -1571,7 +1596,7 @@ static int sdma_v4_0_late_init(void *handle)
|
|||
feature:
|
||||
kfree(*ras_if);
|
||||
*ras_if = NULL;
|
||||
return -EINVAL;
|
||||
return r;
|
||||
}
|
||||
|
||||
static int sdma_v4_0_sw_init(void *handle)
|
||||
|
|
|
@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
|||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t nak_r, nak_g;
|
||||
|
||||
/* Get the number of NAKs received and generated */
|
||||
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||
|
||||
/* Add the total number of NAKs, i.e the number of replays */
|
||||
return (nak_r + nak_g);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs si_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &si_read_disabled_bios,
|
||||
|
@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
|
|||
.need_full_reset = &si_need_full_reset,
|
||||
.get_pcie_usage = &si_get_pcie_usage,
|
||||
.need_reset_on_init = &si_need_reset_on_init,
|
||||
.get_pcie_replay_count = &si_get_pcie_replay_count,
|
||||
};
|
||||
|
||||
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#include "smuio/smuio_9_0_offset.h"
|
||||
#include "smuio/smuio_9_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_default.h"
|
||||
#include "nbio/nbio_7_0_offset.h"
|
||||
#include "nbio/nbio_7_0_sh_mask.h"
|
||||
#include "nbio/nbio_7_0_smn.h"
|
||||
#include "mp/mp_9_0_offset.h"
|
||||
|
@ -64,6 +65,9 @@
|
|||
#include "dce_virtual.h"
|
||||
#include "mxgpu_ai.h"
|
||||
#include "amdgpu_smu.h"
|
||||
#include "amdgpu_ras.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
#include <uapi/linux/kfd_ioctl.h>
|
||||
|
||||
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
|
||||
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
|
||||
|
@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
|
|||
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
|
||||
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
|
||||
WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
|
||||
}
|
||||
|
||||
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
|
||||
|
@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
|
|||
tmp &= ~(entry->and_mask);
|
||||
tmp |= entry->or_mask;
|
||||
}
|
||||
WREG32(reg, tmp);
|
||||
|
||||
if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
|
||||
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
|
||||
reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
|
||||
reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
|
||||
WREG32_RLC(reg, tmp);
|
||||
else
|
||||
WREG32(reg, tmp);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
|
|||
soc15_asic_get_baco_capability(adev, &baco_reset);
|
||||
else
|
||||
baco_reset = false;
|
||||
if (baco_reset) {
|
||||
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
|
||||
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (hive || (ras && ras->supported))
|
||||
baco_reset = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
baco_reset = false;
|
||||
|
@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
|
|||
case CHIP_VEGA20:
|
||||
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||
|
||||
/* For Vega10 SR-IOV, PSP need to be initialized before IH */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||
}
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
} else {
|
||||
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
|
||||
if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
|
||||
if (adev->asic_type == CHIP_VEGA20)
|
||||
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
|
||||
else
|
||||
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
|
||||
}
|
||||
}
|
||||
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
|
||||
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
|
||||
|
@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
|||
/* Just return false for soc15 GPUs. Reset does not seem to
|
||||
* be necessary.
|
||||
*/
|
||||
return false;
|
||||
if (!amdgpu_passthrough(adev))
|
||||
return false;
|
||||
|
||||
if (adev->flags & AMD_IS_APU)
|
||||
return false;
|
||||
|
@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t nak_r, nak_g;
|
||||
|
||||
/* Get the number of NAKs received and generated */
|
||||
nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
|
||||
nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
|
||||
|
||||
/* Add the total number of NAKs, i.e the number of replays */
|
||||
return (nak_r + nak_g);
|
||||
}
|
||||
|
||||
static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
||||
{
|
||||
.read_disabled_bios = &soc15_read_disabled_bios,
|
||||
|
@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
|
|||
.init_doorbell_index = &vega10_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||
};
|
||||
|
||||
static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
||||
|
@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
|
|||
.init_doorbell_index = &vega20_doorbell_index_init,
|
||||
.get_pcie_usage = &soc15_get_pcie_usage,
|
||||
.need_reset_on_init = &soc15_need_reset_on_init,
|
||||
.get_pcie_replay_count = &soc15_get_pcie_replay_count,
|
||||
};
|
||||
|
||||
static int soc15_common_early_init(void *handle)
|
||||
{
|
||||
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
|
||||
adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
|
||||
adev->smc_rreg = NULL;
|
||||
adev->smc_wreg = NULL;
|
||||
adev->pcie_rreg = &soc15_pcie_rreg;
|
||||
|
@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
|
|||
int i;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
||||
ring->use_doorbell, ring->doorbell_index,
|
||||
adev->doorbell_index.sdma_doorbell_range);
|
||||
/* Two reasons to skip
|
||||
* 1, Host driver already programmed them
|
||||
* 2, To avoid registers program violations in SR-IOV
|
||||
*/
|
||||
if (!amdgpu_virt_support_skip_setting(adev)) {
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
adev->nbio_funcs->sdma_doorbell_range(adev, i,
|
||||
ring->use_doorbell, ring->doorbell_index,
|
||||
adev->doorbell_index.sdma_doorbell_range);
|
||||
}
|
||||
}
|
||||
|
||||
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
|
||||
|
@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle)
|
|||
soc15_program_aspm(adev);
|
||||
/* setup nbio registers */
|
||||
adev->nbio_funcs->init_registers(adev);
|
||||
/* remap HDP registers to a hole in mmio space,
|
||||
* for the purpose of expose those registers
|
||||
* to process space
|
||||
*/
|
||||
if (adev->nbio_funcs->remap_hdp_registers)
|
||||
adev->nbio_funcs->remap_hdp_registers(adev);
|
||||
/* enable the doorbell aperture */
|
||||
soc15_enable_doorbell_aperture(adev, true);
|
||||
/* HW doorbell routing policy: doorbell writing not
|
||||
|
|
|
@ -42,8 +42,18 @@ struct soc15_reg_golden {
|
|||
u32 or_mask;
|
||||
};
|
||||
|
||||
struct soc15_reg_entry {
|
||||
uint32_t hwip;
|
||||
uint32_t inst;
|
||||
uint32_t seg;
|
||||
uint32_t reg_offset;
|
||||
uint32_t reg_value;
|
||||
};
|
||||
|
||||
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
|
||||
|
||||
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
|
||||
|
||||
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
|
||||
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
|
||||
|
||||
|
|
|
@ -69,26 +69,60 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
|
||||
({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||
UVD_DPG_LMA_CTL__MASK_EN_MASK | \
|
||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||
RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
|
||||
|
||||
#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
|
||||
#define WREG32_RLC(reg, value) \
|
||||
do { \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
|
||||
WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
|
||||
UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
|
||||
((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
|
||||
<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
|
||||
(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
|
||||
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||
uint32_t i = 0; \
|
||||
uint32_t retries = 50000; \
|
||||
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
|
||||
uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
|
||||
uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
|
||||
WREG32(r0, value); \
|
||||
WREG32(r1, (reg | 0x80000000)); \
|
||||
WREG32(spare_int, 0x1); \
|
||||
for (i = 0; i < retries; i++) { \
|
||||
u32 tmp = RREG32(r1); \
|
||||
if (!(tmp & 0x80000000)) \
|
||||
break; \
|
||||
udelay(10); \
|
||||
} \
|
||||
if (i >= retries) \
|
||||
pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
|
||||
} else { \
|
||||
WREG32(reg, value); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
|
||||
do { \
|
||||
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
|
||||
if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
|
||||
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
|
||||
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
|
||||
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
|
||||
uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
|
||||
if (target_reg == grbm_cntl) \
|
||||
WREG32(r2, value); \
|
||||
else if (target_reg == grbm_idx) \
|
||||
WREG32(r3, value); \
|
||||
WREG32(target_reg, value); \
|
||||
} else { \
|
||||
WREG32(target_reg, value); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define WREG32_SOC15_RLC(ip, inst, reg, value) \
|
||||
do { \
|
||||
uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
|
||||
WREG32_RLC(target_reg, value); \
|
||||
} while (0)
|
||||
|
||||
#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
|
||||
WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
|
||||
(RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
|
||||
& ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
|
||||
|
||||
#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
|
||||
WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_UVD,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = uvd_v4_2_ring_get_rptr,
|
||||
.get_wptr = uvd_v4_2_ring_get_wptr,
|
||||
.set_wptr = uvd_v4_2_ring_set_wptr,
|
||||
|
|
|
@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_UVD,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = uvd_v5_0_ring_get_rptr,
|
||||
.get_wptr = uvd_v5_0_ring_get_wptr,
|
||||
.set_wptr = uvd_v5_0_ring_set_wptr,
|
||||
|
|
|
@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_UVD,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = uvd_v6_0_ring_get_rptr,
|
||||
.get_wptr = uvd_v6_0_ring_get_wptr,
|
||||
.set_wptr = uvd_v6_0_ring_set_wptr,
|
||||
|
@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_UVD,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = uvd_v6_0_ring_get_rptr,
|
||||
.get_wptr = uvd_v6_0_ring_get_wptr,
|
||||
.set_wptr = uvd_v6_0_ring_set_wptr,
|
||||
|
@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
|
|||
.align_mask = 0x3f,
|
||||
.nop = HEVC_ENC_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
|
||||
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
|
||||
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
|
||||
|
|
|
@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_UVD,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.get_rptr = uvd_v7_0_ring_get_rptr,
|
||||
.get_wptr = uvd_v7_0_ring_get_wptr,
|
||||
|
@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
|
|||
.align_mask = 0x3f,
|
||||
.nop = HEVC_ENC_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
|
||||
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
|
||||
|
|
|
@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = VCE_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = vce_v2_0_ring_get_rptr,
|
||||
.get_wptr = vce_v2_0_ring_get_wptr,
|
||||
.set_wptr = vce_v2_0_ring_set_wptr,
|
||||
|
|
|
@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = VCE_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = vce_v3_0_ring_get_rptr,
|
||||
.get_wptr = vce_v3_0_ring_get_wptr,
|
||||
.set_wptr = vce_v3_0_ring_set_wptr,
|
||||
|
@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = VCE_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.get_rptr = vce_v3_0_ring_get_rptr,
|
||||
.get_wptr = vce_v3_0_ring_get_wptr,
|
||||
.set_wptr = vce_v3_0_ring_set_wptr,
|
||||
|
|
|
@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
|
|||
.align_mask = 0x3f,
|
||||
.nop = VCE_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.get_rptr = vce_v4_0_ring_get_rptr,
|
||||
.get_wptr = vce_v4_0_ring_get_wptr,
|
||||
|
|
|
@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
|
|||
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
|
||||
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
|
||||
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
|
||||
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state);
|
||||
|
||||
/**
|
||||
* vcn_v1_0_early_init - set function pointers
|
||||
|
@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
return r;
|
||||
adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
|
|||
return r;
|
||||
}
|
||||
|
||||
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
|
||||
struct dpg_pause_state *new_state)
|
||||
{
|
||||
int ret_code;
|
||||
uint32_t reg_data = 0;
|
||||
uint32_t reg_data2 = 0;
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
|
||||
ret_code = 0;
|
||||
|
||||
if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
|
||||
if (!ret_code) {
|
||||
/* pause DPG non-jpeg */
|
||||
reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
|
||||
UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_enc[0];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_enc[1];
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
}
|
||||
} else {
|
||||
/* unpause dpg non-jpeg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.fw_based = new_state->fw_based;
|
||||
}
|
||||
|
||||
/* pause/unpause if state is changed */
|
||||
if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
|
||||
DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
|
||||
adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
|
||||
new_state->fw_based, new_state->jpeg);
|
||||
|
||||
reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
|
||||
(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
|
||||
|
||||
if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
|
||||
ret_code = 0;
|
||||
|
||||
if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
|
||||
if (!ret_code) {
|
||||
/* Make sure JPRG Snoop is disabled before sending the pause */
|
||||
reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
|
||||
reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
|
||||
|
||||
/* pause DPG jpeg */
|
||||
reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
|
||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
|
||||
UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
|
||||
|
||||
/* Restore */
|
||||
ring = &adev->vcn.ring_jpeg;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
|
||||
lower_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
|
||||
upper_32_bits(ring->gpu_addr));
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
|
||||
WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
|
||||
UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
|
||||
|
||||
ring = &adev->vcn.ring_dec;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
|
||||
RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
|
||||
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
|
||||
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
|
||||
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
|
||||
}
|
||||
} else {
|
||||
/* unpause dpg jpeg, no need to wait */
|
||||
reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
|
||||
WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
|
||||
}
|
||||
adev->vcn.pause_state.jpeg = new_state->jpeg;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool vcn_v1_0_is_idle(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
|
|||
.type = AMDGPU_RING_TYPE_VCN_DEC,
|
||||
.align_mask = 0xf,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
|
||||
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
|
||||
|
@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
|
|||
.align_mask = 0x3f,
|
||||
.nop = VCN_ENC_CMD_NO_OP,
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
|
||||
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
|
||||
|
@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
|
|||
.align_mask = 0xf,
|
||||
.nop = PACKET0(0x81ff, 0),
|
||||
.support_64bit_ptrs = false,
|
||||
.no_user_fence = true,
|
||||
.vmhub = AMDGPU_MMHUB,
|
||||
.extra_dw = 64,
|
||||
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
|
||||
|
|
|
@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
|||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
}
|
||||
adev->irq.ih.enabled = true;
|
||||
|
||||
if (adev->irq.ih1.ring_size) {
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
}
|
||||
adev->irq.ih1.enabled = true;
|
||||
}
|
||||
|
||||
|
@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
}
|
||||
adev->irq.ih2.enabled = true;
|
||||
}
|
||||
}
|
||||
|
@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
}
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
|
||||
|
@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
|
||||
RB_ENABLE, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
}
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||
|
@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
|
||||
RB_ENABLE, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
}
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||
|
@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
|
||||
!!adev->irq.msi_enabled);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
|
||||
DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
|
||||
}
|
||||
|
||||
/* set the writeback address whether it's enabled or not */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
|
||||
|
@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
WPTR_OVERFLOW_ENABLE, 0);
|
||||
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
|
||||
RB_FULL_DRAIN_ENABLE, 1);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
|
||||
}
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
|
||||
|
@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
|
|||
|
||||
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
|
||||
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
|
||||
if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
|
||||
if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
|
||||
ih_rb_cntl)) {
|
||||
DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
} else {
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
|
||||
}
|
||||
|
||||
/* set rptr, wptr to 0 */
|
||||
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
|
||||
|
|
|
@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
|
|||
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
|
||||
}
|
||||
|
||||
static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t nak_r, nak_g;
|
||||
|
||||
/* Get the number of NAKs received and generated */
|
||||
nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
|
||||
nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
|
||||
|
||||
/* Add the total number of NAKs, i.e the number of replays */
|
||||
return (nak_r + nak_g);
|
||||
}
|
||||
|
||||
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 clock_cntl, pc;
|
||||
|
@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
|
|||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
.get_pcie_usage = &vi_get_pcie_usage,
|
||||
.need_reset_on_init = &vi_need_reset_on_init,
|
||||
.get_pcie_replay_count = &vi_get_pcie_replay_count,
|
||||
};
|
||||
|
||||
#define CZ_REV_BRISTOL(rev) \
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
*/
|
||||
|
||||
static const uint32_t cwsr_trap_gfx8_hex[] = {
|
||||
0xbf820001, 0xbf82012b,
|
||||
0xbf820001, 0xbf820121,
|
||||
0xb8f4f802, 0x89748674,
|
||||
0xb8f5f803, 0x8675ff75,
|
||||
0x00000400, 0xbf850017,
|
||||
|
@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
|||
0x8671ff71, 0x0000ffff,
|
||||
0x8f728374, 0xb972e0c2,
|
||||
0xbf800002, 0xb9740002,
|
||||
0xbe801f70, 0xb8f5f803,
|
||||
0x8675ff75, 0x00000100,
|
||||
0xbf840006, 0xbefa0080,
|
||||
0xb97a0203, 0x8671ff71,
|
||||
0x0000ffff, 0x80f08870,
|
||||
0x82f18071, 0xbefa0080,
|
||||
0xbe801f70, 0xbefa0080,
|
||||
0xb97a0283, 0xbef60068,
|
||||
0xbef70069, 0xb8fa1c07,
|
||||
0x8e7a9c7a, 0x87717a71,
|
||||
|
@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
|
|||
|
||||
|
||||
static const uint32_t cwsr_trap_gfx9_hex[] = {
|
||||
0xbf820001, 0xbf82015d,
|
||||
0xbf820001, 0xbf82015e,
|
||||
0xb8f8f802, 0x89788678,
|
||||
0xb8f1f803, 0x866eff71,
|
||||
0x00000400, 0xbf850037,
|
||||
0x866eff71, 0x00000800,
|
||||
0xbf850003, 0x866eff71,
|
||||
0x00000100, 0xbf840008,
|
||||
0xb8fbf803, 0x866eff7b,
|
||||
0x00000400, 0xbf85003b,
|
||||
0x866eff7b, 0x00000800,
|
||||
0xbf850003, 0x866eff7b,
|
||||
0x00000100, 0xbf84000c,
|
||||
0x866eff78, 0x00002000,
|
||||
0xbf840001, 0xbf810000,
|
||||
0xbf840005, 0xbf8e0010,
|
||||
0xb8eef803, 0x866eff6e,
|
||||
0x00000400, 0xbf84fffb,
|
||||
0x8778ff78, 0x00002000,
|
||||
0x80ec886c, 0x82ed806d,
|
||||
0xb8eef807, 0x866fff6e,
|
||||
|
@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
|||
0x8977ff77, 0xfc000000,
|
||||
0x87776f77, 0x896eff6e,
|
||||
0x001f8000, 0xb96ef807,
|
||||
0xb8f0f812, 0xb8f1f813,
|
||||
0x8ef08870, 0xc0071bb8,
|
||||
0xb8faf812, 0xb8fbf813,
|
||||
0x8efa887a, 0xc0071bbd,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc0071c38, 0x00000008,
|
||||
0xc0071ebd, 0x00000008,
|
||||
0xbf8cc07f, 0x86ee6e6e,
|
||||
0xbf840001, 0xbe801d6e,
|
||||
0xb8f1f803, 0x8671ff71,
|
||||
0xb8fbf803, 0x867bff7b,
|
||||
0x000001ff, 0xbf850002,
|
||||
0x806c846c, 0x826d806d,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
|
@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
|
|||
0x8f6e8378, 0xb96ee0c2,
|
||||
0xbf800002, 0xb9780002,
|
||||
0xbe801f6c, 0x866dff6d,
|
||||
0x0000ffff, 0xbef00080,
|
||||
0xb9700283, 0xb8f02407,
|
||||
0x8e709c70, 0x876d706d,
|
||||
0xb8f003c7, 0x8e709b70,
|
||||
0x876d706d, 0xb8f0f807,
|
||||
0x8670ff70, 0x00007fff,
|
||||
0xb970f807, 0xbeee007e,
|
||||
0x0000ffff, 0xbefa0080,
|
||||
0xb97a0283, 0xb8fa2407,
|
||||
0x8e7a9b7a, 0x876d7a6d,
|
||||
0xb8fa03c7, 0x8e7a9a7a,
|
||||
0x876d7a6d, 0xb8faf807,
|
||||
0x867aff7a, 0x00007fff,
|
||||
0xb97af807, 0xbeee007e,
|
||||
0xbeef007f, 0xbefe0180,
|
||||
0xbf900004, 0x87708478,
|
||||
0xb970f802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xb8f02a05,
|
||||
0xbf900004, 0x877a8478,
|
||||
0xb97af802, 0xbf8e0002,
|
||||
0xbf88fffe, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8fb1605, 0x807b817b,
|
||||
0x8e7b867b, 0x807a7b7a,
|
||||
0x807a7e7a, 0x827b807f,
|
||||
0x867bff7b, 0x0000ffff,
|
||||
0xc04b1c3d, 0x00000050,
|
||||
0xbf8cc07f, 0xc04b1d3d,
|
||||
0x00000060, 0xbf8cc07f,
|
||||
0xc0431e7d, 0x00000074,
|
||||
0xbf8cc07f, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x867aff7f,
|
||||
0x08000000, 0x8f7a837a,
|
||||
0x87777a77, 0x867aff7f,
|
||||
0x70000000, 0x8f7a817a,
|
||||
0x87777a77, 0xbef1007c,
|
||||
0xbef00080, 0xb8f02a05,
|
||||
0x80708170, 0x8e708a70,
|
||||
0xb8f11605, 0x80718171,
|
||||
0x8e718671, 0x80707170,
|
||||
0x80707e70, 0x8271807f,
|
||||
0x8671ff71, 0x0000ffff,
|
||||
0xc0471cb8, 0x00000040,
|
||||
0xbf8cc07f, 0xc04b1d38,
|
||||
0x00000048, 0xbf8cc07f,
|
||||
0xc0431e78, 0x00000058,
|
||||
0xbf8cc07f, 0xc0471eb8,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0xb8fa1605, 0x807a817a,
|
||||
0x8e7a867a, 0x80707a70,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611c7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611b3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611b7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611bba, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611bfa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611e3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xb8fbf803,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc0070, 0xc0611a3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x80708470, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611a7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0xb8f1f801,
|
||||
0xbefe007c, 0xbefc0070,
|
||||
0xc0611c7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x80708470,
|
||||
0xbefc007e, 0x867aff7f,
|
||||
0x04000000, 0xbeef0080,
|
||||
0x876f6f7a, 0xb8f02a05,
|
||||
0x80708170, 0x8e708a70,
|
||||
0xb8fb1605, 0x807b817b,
|
||||
0x8e7b847b, 0x8e76827b,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbef20174, 0x80747074,
|
||||
0x82758075, 0xbefc0080,
|
||||
0xbf800000, 0xbe802b00,
|
||||
0xbe822b02, 0xbe842b04,
|
||||
0xbe862b06, 0xbe882b08,
|
||||
0xbe8a2b0a, 0xbe8c2b0c,
|
||||
0xbe8e2b0e, 0xc06b003a,
|
||||
0x00000000, 0xbf8cc07f,
|
||||
0xc06b013a, 0x00000010,
|
||||
0xbf8cc07f, 0xc06b023a,
|
||||
0x00000020, 0xbf8cc07f,
|
||||
0xc06b033a, 0x00000030,
|
||||
0xbf8cc07f, 0x8074c074,
|
||||
0x82758075, 0x807c907c,
|
||||
0xbf0a7b7c, 0xbf85ffe7,
|
||||
0xbef40172, 0xbef00080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xbee80080, 0xbee90080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xe0724000, 0x701d0000,
|
||||
0xe0724100, 0x701d0100,
|
||||
0xe0724200, 0x701d0200,
|
||||
0xe0724300, 0x701d0300,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8fb4306, 0x867bc17b,
|
||||
0xbf84002c, 0xbf8a0000,
|
||||
0x867aff6f, 0x04000000,
|
||||
0xbf840028, 0x8e7b867b,
|
||||
0x8e7b827b, 0xbef6007b,
|
||||
0xb8f02a05, 0x80708170,
|
||||
0x8e708a70, 0xb8fa1605,
|
||||
0x807a817a, 0x8e7a867a,
|
||||
0x80707a70, 0x8070ff70,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xd28c0002, 0x000100c1,
|
||||
0xd28d0003, 0x000204c1,
|
||||
0xd1060002, 0x00011103,
|
||||
0x7e0602ff, 0x00000200,
|
||||
0xbefc00ff, 0x00010000,
|
||||
0xbe800077, 0x8677ff77,
|
||||
0xff7fffff, 0x8777ff77,
|
||||
0x00058000, 0xd8ec0000,
|
||||
0x00000002, 0xbf8cc07f,
|
||||
0xe0765000, 0x701d0002,
|
||||
0x68040702, 0xd0c9006a,
|
||||
0x0000f702, 0xbf87fff7,
|
||||
0xbef70000, 0xbef000ff,
|
||||
0x00000400, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8fb2a05,
|
||||
0x807b817b, 0x8e7b827b,
|
||||
0x8e76887b, 0xbef600ff,
|
||||
0x01000000, 0xbefc0084,
|
||||
0xbf0a7b7c, 0xbf840015,
|
||||
0xbf11017c, 0x807bff7b,
|
||||
0x00001000, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0xe0724000,
|
||||
0x701d0000, 0xe0724100,
|
||||
0x701d0100, 0xe0724200,
|
||||
0x701d0200, 0xe0724300,
|
||||
0x701d0300, 0x807c847c,
|
||||
0x8070ff70, 0x00000400,
|
||||
0xbf0a7b7c, 0xbf85ffef,
|
||||
0xbf9c0000, 0xbf8200da,
|
||||
0xbef4007e, 0x8675ff7f,
|
||||
0x0000ffff, 0x8775ff75,
|
||||
0x00040000, 0xbef60080,
|
||||
0xbef700ff, 0x00807fac,
|
||||
0x8670ff7f, 0x08000000,
|
||||
0x8f708370, 0x87777077,
|
||||
0x8670ff7f, 0x70000000,
|
||||
0x8f708170, 0x87777077,
|
||||
0xbefb007c, 0xbefa0080,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f01605,
|
||||
0x80708170, 0x8e708670,
|
||||
0x807a707a, 0xbef60084,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611efa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611b3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611b7a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611bba,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611bfa, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611e3a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8f1f803, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611c7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xbefe007c, 0xbefc007a,
|
||||
0xc0611a3a, 0x0000007c,
|
||||
0xbf8cc07f, 0x807a847a,
|
||||
0xbefc007e, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611a7a,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0xb8fbf801, 0xbefe007c,
|
||||
0xbefc007a, 0xc0611efa,
|
||||
0x0000007c, 0xbf8cc07f,
|
||||
0x807a847a, 0xbefc007e,
|
||||
0x8670ff7f, 0x04000000,
|
||||
0xbeef0080, 0x876f6f70,
|
||||
0xb8fa2a05, 0x807a817a,
|
||||
0x8e7a8a7a, 0xb8f11605,
|
||||
0x80718171, 0x8e718471,
|
||||
0x8e768271, 0xbef600ff,
|
||||
0x01000000, 0xbef20174,
|
||||
0x80747a74, 0x82758075,
|
||||
0xbefc0080, 0xbf800000,
|
||||
0xbe802b00, 0xbe822b02,
|
||||
0xbe842b04, 0xbe862b06,
|
||||
0xbe882b08, 0xbe8a2b0a,
|
||||
0xbe8c2b0c, 0xbe8e2b0e,
|
||||
0xc06b003a, 0x00000000,
|
||||
0xbf8cc07f, 0xc06b013a,
|
||||
0x00000010, 0xbf8cc07f,
|
||||
0xc06b023a, 0x00000020,
|
||||
0xbf8cc07f, 0xc06b033a,
|
||||
0x00000030, 0xbf8cc07f,
|
||||
0x8074c074, 0x82758075,
|
||||
0x807c907c, 0xbf0a717c,
|
||||
0xbf85ffe7, 0xbef40172,
|
||||
0xbefa0080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xbee80080,
|
||||
0xbee90080, 0xbef600ff,
|
||||
0x01000000, 0xe0724000,
|
||||
0x7a1d0000, 0xe0724100,
|
||||
0x7a1d0100, 0xe0724200,
|
||||
0x7a1d0200, 0xe0724300,
|
||||
0x7a1d0300, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8f14306,
|
||||
0x8671c171, 0xbf84002c,
|
||||
0xbf8a0000, 0x8670ff6f,
|
||||
0x04000000, 0xbf840028,
|
||||
0x8e718671, 0x8e718271,
|
||||
0xbef60071, 0xb8fa2a05,
|
||||
0x807a817a, 0x8e7a8a7a,
|
||||
0xb8f01605, 0x80708170,
|
||||
0x8e708670, 0x807a707a,
|
||||
0x807aff7a, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xd28c0002,
|
||||
0x000100c1, 0xd28d0003,
|
||||
0x000204c1, 0xd1060002,
|
||||
0x00011103, 0x7e0602ff,
|
||||
0x00000200, 0xbefc00ff,
|
||||
0x00010000, 0xbe800077,
|
||||
0x8677ff77, 0xff7fffff,
|
||||
0x8777ff77, 0x00058000,
|
||||
0xd8ec0000, 0x00000002,
|
||||
0xbf8cc07f, 0xe0765000,
|
||||
0x7a1d0002, 0x68040702,
|
||||
0xd0c9006a, 0x0000e302,
|
||||
0xbf87fff7, 0xbef70000,
|
||||
0xbefa00ff, 0x00000400,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8f12a05, 0x80718171,
|
||||
0x8e718271, 0x8e768871,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0084, 0xbf0a717c,
|
||||
0xbf840015, 0xbf11017c,
|
||||
0x8071ff71, 0x00001000,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0xe0724000, 0x7a1d0000,
|
||||
0xe0724100, 0x7a1d0100,
|
||||
0xe0724200, 0x7a1d0200,
|
||||
0xe0724300, 0x7a1d0300,
|
||||
0x807c847c, 0x807aff7a,
|
||||
0x00000400, 0xbf0a717c,
|
||||
0xbf85ffef, 0xbf9c0000,
|
||||
0xbf8200dc, 0xbef4007e,
|
||||
0x8675ff7f, 0x0000ffff,
|
||||
0x8775ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x00807fac, 0x866eff7f,
|
||||
0x08000000, 0x8f6e836e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x70000000, 0x8f6e816e,
|
||||
0x87776e77, 0x866eff7f,
|
||||
0x04000000, 0xbf84001e,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef4306, 0x866fc16f,
|
||||
0xbf840019, 0x8e6f866f,
|
||||
0x8e6f826f, 0xbef6006f,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xbefc0080,
|
||||
0xe0510000, 0x781d0000,
|
||||
0xe0510100, 0x781d0000,
|
||||
0x807cff7c, 0x00000200,
|
||||
0x8078ff78, 0x00000200,
|
||||
0xbf0a6f7c, 0xbf85fff6,
|
||||
0xbef80080, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef2a05,
|
||||
0x806f816f, 0x8e6f826f,
|
||||
0x8e76886f, 0xbef600ff,
|
||||
0x01000000, 0xbeee0078,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbefc0084, 0xbf11087c,
|
||||
0x806fff6f, 0x00008000,
|
||||
0xe0524000, 0x781d0000,
|
||||
0xe0524100, 0x781d0100,
|
||||
0xe0524200, 0x781d0200,
|
||||
0xe0524300, 0x781d0300,
|
||||
0xbf8c0f70, 0x7e000300,
|
||||
0x7e020301, 0x7e040302,
|
||||
0x7e060303, 0x807c847c,
|
||||
0x8078ff78, 0x00000400,
|
||||
0xbf0a6f7c, 0xbf85ffee,
|
||||
0xbf9c0000, 0xe0524000,
|
||||
0x6e1d0000, 0xe0524100,
|
||||
0x6e1d0100, 0xe0524200,
|
||||
0x6e1d0200, 0xe0524300,
|
||||
0x6e1d0300, 0xb8f82a05,
|
||||
0x866eff7f, 0x08000000,
|
||||
0x8f6e836e, 0x87776e77,
|
||||
0x866eff7f, 0x70000000,
|
||||
0x8f6e816e, 0x87776e77,
|
||||
0x866eff7f, 0x04000000,
|
||||
0xbf84001e, 0xbefe00c1,
|
||||
0xbeff00c1, 0xb8ef4306,
|
||||
0x866fc16f, 0xbf840019,
|
||||
0x8e6f866f, 0x8e6f826f,
|
||||
0xbef6006f, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0x80f8c078, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f846f,
|
||||
0x8e76826f, 0xbef600ff,
|
||||
0x01000000, 0xbefc006f,
|
||||
0xc031003a, 0x00000078,
|
||||
0x80f8c078, 0xbf8cc07f,
|
||||
0x80fc907c, 0xbf800000,
|
||||
0xbe802d00, 0xbe822d02,
|
||||
0xbe842d04, 0xbe862d06,
|
||||
0xbe882d08, 0xbe8a2d0a,
|
||||
0xbe8c2d0c, 0xbe8e2d0e,
|
||||
0xbf06807c, 0xbf84fff0,
|
||||
0x8078ff78, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefc0080, 0xe0510000,
|
||||
0x781d0000, 0xe0510100,
|
||||
0x781d0000, 0x807cff7c,
|
||||
0x00000200, 0x8078ff78,
|
||||
0x00000200, 0xbf0a6f7c,
|
||||
0xbf85fff6, 0xbef80080,
|
||||
0xbefe00c1, 0xbeff00c1,
|
||||
0xb8ef2a05, 0x806f816f,
|
||||
0x8e6f826f, 0x8e76886f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbeee0078, 0x8078ff78,
|
||||
0x00000400, 0xbefc0084,
|
||||
0xbf11087c, 0x806fff6f,
|
||||
0x00008000, 0xe0524000,
|
||||
0x781d0000, 0xe0524100,
|
||||
0x781d0100, 0xe0524200,
|
||||
0x781d0200, 0xe0524300,
|
||||
0x781d0300, 0xbf8c0f70,
|
||||
0x7e000300, 0x7e020301,
|
||||
0x7e040302, 0x7e060303,
|
||||
0x807c847c, 0x8078ff78,
|
||||
0x00000400, 0xbf0a6f7c,
|
||||
0xbf85ffee, 0xbf9c0000,
|
||||
0xe0524000, 0x6e1d0000,
|
||||
0xe0524100, 0x6e1d0100,
|
||||
0xe0524200, 0x6e1d0200,
|
||||
0xe0524300, 0x6e1d0300,
|
||||
0xb8f82a05, 0x80788178,
|
||||
0x8e788a78, 0xb8ee1605,
|
||||
0x806e816e, 0x8e6e866e,
|
||||
0x80786e78, 0xbef60084,
|
||||
0x80786e78, 0x80f8c078,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f846f, 0x8e76826f,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xc0211bfa, 0x00000078,
|
||||
0x80788478, 0xc0211b3a,
|
||||
0xbefc006f, 0xc031003a,
|
||||
0x00000078, 0x80f8c078,
|
||||
0xbf8cc07f, 0x80fc907c,
|
||||
0xbf800000, 0xbe802d00,
|
||||
0xbe822d02, 0xbe842d04,
|
||||
0xbe862d06, 0xbe882d08,
|
||||
0xbe8a2d0a, 0xbe8c2d0c,
|
||||
0xbe8e2d0e, 0xbf06807c,
|
||||
0xbf84fff0, 0xb8f82a05,
|
||||
0x80788178, 0x8e788a78,
|
||||
0xb8ee1605, 0x806e816e,
|
||||
0x8e6e866e, 0x80786e78,
|
||||
0xbef60084, 0xbef600ff,
|
||||
0x01000000, 0xc0211bfa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211b7a, 0x00000078,
|
||||
0x80788478, 0xc0211eba,
|
||||
0xc0211b3a, 0x00000078,
|
||||
0x80788478, 0xc0211b7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211efa, 0x00000078,
|
||||
0x80788478, 0xc0211c3a,
|
||||
0xc0211c3a, 0x00000078,
|
||||
0x80788478, 0xc0211c7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211c7a, 0x00000078,
|
||||
0x80788478, 0xc0211a3a,
|
||||
0xc0211eba, 0x00000078,
|
||||
0x80788478, 0xc0211efa,
|
||||
0x00000078, 0x80788478,
|
||||
0xc0211a7a, 0x00000078,
|
||||
0x80788478, 0xc0211cfa,
|
||||
0xc0211a3a, 0x00000078,
|
||||
0x80788478, 0xc0211a7a,
|
||||
0x00000078, 0x80788478,
|
||||
0xbf8cc07f, 0xbefc006f,
|
||||
0xbefe007a, 0xbeff007b,
|
||||
0x866f71ff, 0x000003ff,
|
||||
0xb96f4803, 0x866f71ff,
|
||||
0xfffff800, 0x8f6f8b6f,
|
||||
0xb96fa2c3, 0xb973f801,
|
||||
0xb8ee2a05, 0x806e816e,
|
||||
0x8e6e8a6e, 0xb8ef1605,
|
||||
0x806f816f, 0x8e6f866f,
|
||||
0x806e6f6e, 0x806e746e,
|
||||
0x826f8075, 0x866fff6f,
|
||||
0x0000ffff, 0xc0071cb7,
|
||||
0x00000040, 0xc00b1d37,
|
||||
0x00000048, 0xc0031e77,
|
||||
0x00000058, 0xc0071eb7,
|
||||
0x0000005c, 0xbf8cc07f,
|
||||
0x866fff6d, 0xf0000000,
|
||||
0x8f6f9c6f, 0x8e6f906f,
|
||||
0xbeee0080, 0x876e6f6e,
|
||||
0x866fff6d, 0x08000000,
|
||||
0x8f6f9b6f, 0x8e6f8f6f,
|
||||
0x876e6f6e, 0x866fff70,
|
||||
0x00800000, 0x8f6f976f,
|
||||
0xb96ef807, 0x866dff6d,
|
||||
0x0000ffff, 0x86fe7e7e,
|
||||
0x86ea6a6a, 0x8f6e8370,
|
||||
0xb96ee0c2, 0xbf800002,
|
||||
0xb9700002, 0xbf8a0000,
|
||||
0x95806f6c, 0xbf810000,
|
||||
0xc0211cfa, 0x00000078,
|
||||
0x80788478, 0xbf8cc07f,
|
||||
0xbefc006f, 0xbefe0070,
|
||||
0xbeff0071, 0x866f7bff,
|
||||
0x000003ff, 0xb96f4803,
|
||||
0x866f7bff, 0xfffff800,
|
||||
0x8f6f8b6f, 0xb96fa2c3,
|
||||
0xb973f801, 0xb8ee2a05,
|
||||
0x806e816e, 0x8e6e8a6e,
|
||||
0xb8ef1605, 0x806f816f,
|
||||
0x8e6f866f, 0x806e6f6e,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x866fff6f, 0x0000ffff,
|
||||
0xc00b1c37, 0x00000050,
|
||||
0xc00b1d37, 0x00000060,
|
||||
0xc0031e77, 0x00000074,
|
||||
0xbf8cc07f, 0x866fff6d,
|
||||
0xf8000000, 0x8f6f9b6f,
|
||||
0x8e6f906f, 0xbeee0080,
|
||||
0x876e6f6e, 0x866fff6d,
|
||||
0x04000000, 0x8f6f9a6f,
|
||||
0x8e6f8f6f, 0x876e6f6e,
|
||||
0x866fff7a, 0x00800000,
|
||||
0x8f6f976f, 0xb96ef807,
|
||||
0x866dff6d, 0x0000ffff,
|
||||
0x86fe7e7e, 0x86ea6a6a,
|
||||
0x8f6e837a, 0xb96ee0c2,
|
||||
0xbf800002, 0xb97a0002,
|
||||
0xbf8a0000, 0x95806f6c,
|
||||
0xbf810000, 0x00000000,
|
||||
};
|
||||
|
|
|
@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
|
|||
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
|
||||
end
|
||||
|
||||
//check whether there is mem_viol
|
||||
s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
|
||||
s_cbranch_scc0 L_NO_PC_REWIND
|
||||
|
||||
//if so, need rewind PC assuming GDS operation gets NACKed
|
||||
s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
|
||||
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
|
||||
s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
|
||||
s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
|
||||
|
||||
L_NO_PC_REWIND:
|
||||
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
|
||||
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
|
||||
|
||||
|
|
|
@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
|
|||
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
|
||||
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
|
||||
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
|
||||
var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
|
||||
var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
|
||||
var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
|
||||
|
||||
var s_save_spi_init_lo = exec_lo
|
||||
var s_save_spi_init_hi = exec_hi
|
||||
|
@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
|
|||
var s_save_pc_hi = ttmp1
|
||||
var s_save_exec_lo = ttmp2
|
||||
var s_save_exec_hi = ttmp3
|
||||
var s_save_tmp = ttmp4
|
||||
var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
|
||||
var s_save_tmp = ttmp14
|
||||
var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
|
||||
var s_save_xnack_mask_lo = ttmp6
|
||||
var s_save_xnack_mask_hi = ttmp7
|
||||
var s_save_buf_rsrc0 = ttmp8
|
||||
|
@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
|
|||
var s_save_buf_rsrc2 = ttmp10
|
||||
var s_save_buf_rsrc3 = ttmp11
|
||||
var s_save_status = ttmp12
|
||||
var s_save_mem_offset = ttmp14
|
||||
var s_save_mem_offset = ttmp4
|
||||
var s_save_alloc_size = s_save_trapsts //conflict
|
||||
var s_save_m0 = ttmp15
|
||||
var s_save_m0 = ttmp5
|
||||
var s_save_ttmps_lo = s_save_tmp //no conflict
|
||||
var s_save_ttmps_hi = s_save_trapsts //no conflict
|
||||
|
||||
|
@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
|
|||
|
||||
var s_restore_pc_lo = ttmp0
|
||||
var s_restore_pc_hi = ttmp1
|
||||
var s_restore_exec_lo = ttmp14
|
||||
var s_restore_exec_hi = ttmp15
|
||||
var s_restore_status = ttmp4
|
||||
var s_restore_trapsts = ttmp5
|
||||
var s_restore_exec_lo = ttmp4
|
||||
var s_restore_exec_hi = ttmp5
|
||||
var s_restore_status = ttmp14
|
||||
var s_restore_trapsts = ttmp15
|
||||
var s_restore_xnack_mask_lo = xnack_mask_lo
|
||||
var s_restore_xnack_mask_hi = xnack_mask_hi
|
||||
var s_restore_buf_rsrc0 = ttmp8
|
||||
|
@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
|
|||
|
||||
L_HALT_WAVE:
|
||||
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
|
||||
// We cannot prevent further faults so just terminate the wavefront.
|
||||
// We cannot prevent further faults. Spin wait until context saved.
|
||||
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_cbranch_scc0 L_NOT_ALREADY_HALTED
|
||||
s_endpgm
|
||||
|
||||
L_WAIT_CTX_SAVE:
|
||||
s_sleep 0x10
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
|
||||
s_cbranch_scc0 L_WAIT_CTX_SAVE
|
||||
|
||||
L_NOT_ALREADY_HALTED:
|
||||
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
|
@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
|
|||
// Read second-level TBA/TMA from first-level TMA and jump if available.
|
||||
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
|
||||
// ttmp12 holds SQ_WAVE_STATUS
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
|
||||
s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
|
||||
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
|
||||
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
|
||||
s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
|
||||
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
|
||||
|
@ -405,7 +411,7 @@ end
|
|||
else
|
||||
end
|
||||
|
||||
// Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
||||
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
get_vgpr_size_bytes(s_save_ttmps_lo)
|
||||
get_sgpr_size_bytes(s_save_ttmps_hi)
|
||||
|
@ -413,13 +419,11 @@ end
|
|||
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
|
||||
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
|
||||
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
|
||||
s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
|
||||
s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
|
||||
s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
|
||||
s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
|
||||
ack_sqc_store_workaround()
|
||||
|
||||
/* setup Resource Contants */
|
||||
|
@ -1093,7 +1097,7 @@ end
|
|||
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
|
||||
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
|
||||
|
||||
// Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
|
||||
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
|
||||
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
|
||||
get_vgpr_size_bytes(s_restore_ttmps_lo)
|
||||
get_sgpr_size_bytes(s_restore_ttmps_hi)
|
||||
|
@ -1101,10 +1105,9 @@ end
|
|||
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
|
||||
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
|
||||
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
|
||||
s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
|
||||
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
|
||||
s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
|
||||
s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
|
||||
s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
|
||||
s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
//reuse s_restore_m0 as a temp register
|
||||
|
|
|
@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
|
|||
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
|
||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
|
||||
q_properties->type = KFD_QUEUE_TYPE_SDMA;
|
||||
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
|
||||
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
|
||||
else
|
||||
return -ENOTSUPP;
|
||||
|
||||
|
@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
|
|||
struct kfd_process_device *pdd;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (dev == NULL)
|
||||
if (!dev)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
|||
if (args->size != kfd_doorbell_process_slice(dev))
|
||||
return -EINVAL;
|
||||
offset = kfd_get_process_doorbells(dev, p);
|
||||
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
if (args->size != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
if (!offset)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
|
|||
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
|
||||
args->mmap_offset = offset;
|
||||
|
||||
/* MMIO is mapped through kfd device
|
||||
* Generate a kfd mmap offset
|
||||
*/
|
||||
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
|
||||
args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
|
||||
args->mmap_offset <<= PAGE_SHIFT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
|
@ -1551,6 +1567,32 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
|
|||
return err;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_alloc_queue_gws(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_ioctl_alloc_queue_gws_args *args = data;
|
||||
struct kfd_dev *dev;
|
||||
|
||||
if (!hws_gws_support)
|
||||
return -EINVAL;
|
||||
|
||||
dev = kfd_device_by_id(args->gpu_id);
|
||||
if (!dev) {
|
||||
pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
args->first_gws = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
|
||||
struct kfd_process *p, void *data)
|
||||
{
|
||||
|
@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
|
|||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
|
||||
kfd_ioctl_import_dmabuf, 0),
|
||||
|
||||
AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
|
||||
kfd_ioctl_alloc_queue_gws, 0),
|
||||
};
|
||||
|
||||
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
|
||||
|
@ -1845,6 +1889,39 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
|
|||
return retcode;
|
||||
}
|
||||
|
||||
static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
phys_addr_t address;
|
||||
int ret;
|
||||
|
||||
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
|
||||
|
||||
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
|
||||
VM_DONTDUMP | VM_PFNMAP;
|
||||
|
||||
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
||||
|
||||
pr_debug("Process %d mapping mmio page\n"
|
||||
" target user address == 0x%08llX\n"
|
||||
" physical address == 0x%08llX\n"
|
||||
" vm_flags == 0x%04lX\n"
|
||||
" size == 0x%04lX\n",
|
||||
process->pasid, (unsigned long long) vma->vm_start,
|
||||
address, vma->vm_flags, PAGE_SIZE);
|
||||
|
||||
ret = io_remap_pfn_range(vma,
|
||||
vma->vm_start,
|
||||
address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
vma->vm_page_prot);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct kfd_process *process;
|
||||
|
@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
|
|||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_reserved_mem_mmap(dev, process, vma);
|
||||
case KFD_MMAP_TYPE_MMIO:
|
||||
if (!dev)
|
||||
return -ENODEV;
|
||||
return kfd_mmio_mmap(dev, process, vma);
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
|
|
|
@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
|
|||
#define polaris10_cache_info carrizo_cache_info
|
||||
#define polaris11_cache_info carrizo_cache_info
|
||||
#define polaris12_cache_info carrizo_cache_info
|
||||
#define vegam_cache_info carrizo_cache_info
|
||||
/* TODO - check & update Vega10 cache details */
|
||||
#define vega10_cache_info carrizo_cache_info
|
||||
#define raven_cache_info carrizo_cache_info
|
||||
|
@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
|
|||
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
|
||||
props->weight = 20;
|
||||
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
|
||||
props->weight = 15;
|
||||
props->weight = 15 * iolink->num_hops_xgmi;
|
||||
else
|
||||
props->weight = node_distance(id_from, id_to);
|
||||
|
||||
|
@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
|
|||
pcache_info = polaris12_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
|
||||
break;
|
||||
case CHIP_VEGAM:
|
||||
pcache_info = vegam_cache_info;
|
||||
num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
|
@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
|
|||
|
||||
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
||||
struct kfd_dev *kdev,
|
||||
struct kfd_dev *peer_kdev,
|
||||
struct crat_subtype_iolink *sub_type_hdr,
|
||||
uint32_t proximity_domain_from,
|
||||
uint32_t proximity_domain_to)
|
||||
|
@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
|
|||
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
|
||||
sub_type_hdr->proximity_domain_from = proximity_domain_from;
|
||||
sub_type_hdr->proximity_domain_to = proximity_domain_to;
|
||||
sub_type_hdr->num_hops_xgmi =
|
||||
amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
|||
(char *)sub_type_hdr +
|
||||
sizeof(struct crat_subtype_iolink));
|
||||
ret = kfd_fill_gpu_xgmi_link_to_gpu(
|
||||
&avail_size, kdev,
|
||||
&avail_size, kdev, peer_dev->gpu,
|
||||
(struct crat_subtype_iolink *)sub_type_hdr,
|
||||
proximity_domain, nid);
|
||||
if (ret < 0)
|
||||
|
|
|
@ -274,7 +274,8 @@ struct crat_subtype_iolink {
|
|||
uint32_t minimum_bandwidth_mbs;
|
||||
uint32_t maximum_bandwidth_mbs;
|
||||
uint32_t recommended_transfer_size;
|
||||
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
|
||||
uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
|
||||
uint8_t num_hops_xgmi;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
|
|||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
|
|||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
|
|||
.needs_iommu_device = true,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 1,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
#endif
|
||||
|
@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
static const struct kfd_device_info vegam_device_info = {
|
||||
.asic_family = CHIP_VEGAM,
|
||||
.max_pasid_bits = 16,
|
||||
.max_no_of_hqd = 24,
|
||||
.doorbell_size = 4,
|
||||
.ih_ring_entry_size = 4 * sizeof(uint32_t),
|
||||
.event_interrupt_class = &event_interrupt_class_cik,
|
||||
.num_of_watch_points = 4,
|
||||
.mqd_size_aligned = MQD_SIZE_ALIGNED,
|
||||
.supports_cwsr = true,
|
||||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = true,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 2,
|
||||
};
|
||||
|
||||
|
@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
|
|||
.needs_iommu_device = false,
|
||||
.needs_pci_atomics = false,
|
||||
.num_sdma_engines = 2,
|
||||
.num_xgmi_sdma_engines = 0,
|
||||
.num_sdma_queues_per_engine = 8,
|
||||
};
|
||||
|
||||
|
@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
|
|||
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
|
||||
{ 0x694C, &vegam_device_info }, /* VegaM */
|
||||
{ 0x694E, &vegam_device_info }, /* VegaM */
|
||||
{ 0x694F, &vegam_device_info }, /* VegaM */
|
||||
{ 0x6860, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6861, &vega10_device_info }, /* Vega10 */
|
||||
{ 0x6862, &vega10_device_info }, /* Vega10 */
|
||||
|
@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
} else
|
||||
kfd->max_proc_per_quantum = hws_max_conc_proc;
|
||||
|
||||
/* Allocate global GWS that is shared by all KFD processes */
|
||||
if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
|
||||
dev_err(kfd_device, "Could not allocate %d gws\n",
|
||||
amdgpu_amdkfd_get_num_gws(kfd->kgd));
|
||||
goto out;
|
||||
}
|
||||
/* calculate max size of mqds needed for queues */
|
||||
size = max_num_of_queues_per_device *
|
||||
kfd->device_info->mqd_size_aligned;
|
||||
|
@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
|
||||
false)) {
|
||||
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
|
||||
goto out;
|
||||
goto alloc_gtt_mem_failure;
|
||||
}
|
||||
|
||||
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
|
||||
|
@ -611,6 +653,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
|
|||
kfd_gtt_sa_fini(kfd);
|
||||
kfd_gtt_sa_init_error:
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
alloc_gtt_mem_failure:
|
||||
if (hws_gws_support)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
dev_err(kfd_device,
|
||||
"device %x:%x NOT added due to errors\n",
|
||||
kfd->pdev->vendor, kfd->pdev->device);
|
||||
|
@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
|
|||
kfd_doorbell_fini(kfd);
|
||||
kfd_gtt_sa_fini(kfd);
|
||||
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
|
||||
if (hws_gws_support)
|
||||
amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
|
||||
}
|
||||
|
||||
kfree(kfd);
|
||||
|
|
|
@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
struct qcm_process_device *qpd);
|
||||
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id);
|
||||
struct queue *q);
|
||||
|
||||
static void kfd_process_hw_exception(struct work_struct *work);
|
||||
|
||||
static inline
|
||||
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
|
||||
{
|
||||
if (type == KFD_QUEUE_TYPE_SDMA)
|
||||
if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
return KFD_MQD_TYPE_SDMA;
|
||||
return KFD_MQD_TYPE_CP;
|
||||
}
|
||||
|
@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
|
|||
return dqm->dev->device_info->num_sdma_engines;
|
||||
}
|
||||
|
||||
static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_xgmi_sdma_engines;
|
||||
}
|
||||
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_sdma_engines
|
||||
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||
}
|
||||
|
||||
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return dqm->dev->device_info->num_xgmi_sdma_engines
|
||||
* dqm->dev->device_info->num_sdma_queues_per_engine;
|
||||
}
|
||||
|
||||
void program_sh_mem_settings(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
|
@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
|
|||
* preserve the user mode ABI.
|
||||
*/
|
||||
q->doorbell_id = q->properties.queue_id;
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
|
||||
* doorbell assignments based on the engine and queue id.
|
||||
* The doobell index distance between RLC (2*i) and (2*i+1)
|
||||
|
@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
|
|||
struct kfd_dev *dev = qpd->dqm->dev;
|
||||
|
||||
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
return;
|
||||
|
||||
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
|
||||
|
@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
|
||||
retval = create_compute_queue_nocpsch(dqm, q, qpd);
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
|
||||
else
|
||||
retval = -EINVAL;
|
||||
|
@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
dqm->xgmi_sdma_queue_count++;
|
||||
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
|
@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
struct mqd_manager *mqd_mgr;
|
||||
int retval;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||
|
||||
retval = allocate_hqd(dqm, q);
|
||||
if (retval)
|
||||
|
@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
|
|||
int retval;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
|
||||
deallocate_hqd(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else {
|
||||
pr_debug("q->properties.type %d is invalid\n",
|
||||
q->properties.type);
|
||||
|
@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
retval = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
|
@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
}
|
||||
} else if (prev_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
|
||||
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
|
||||
|
@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
retval = map_queues_cpsch(dqm);
|
||||
else if (q->properties.is_active &&
|
||||
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
|
||||
if (WARN(q->process->mm != current->mm,
|
||||
"should only run in user thread"))
|
||||
retval = -EFAULT;
|
||||
|
@ -571,27 +584,6 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
|
|||
return retval;
|
||||
}
|
||||
|
||||
static struct mqd_manager *get_mqd_manager(
|
||||
struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
|
||||
{
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
|
||||
return NULL;
|
||||
|
||||
pr_debug("mqd type %d\n", type);
|
||||
|
||||
mqd_mgr = dqm->mqd_mgrs[type];
|
||||
if (!mqd_mgr) {
|
||||
mqd_mgr = mqd_manager_init(type, dqm->dev);
|
||||
if (!mqd_mgr)
|
||||
pr_err("mqd manager is NULL");
|
||||
dqm->mqd_mgrs[type] = mqd_mgr;
|
||||
}
|
||||
|
||||
return mqd_mgr;
|
||||
}
|
||||
|
||||
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd)
|
||||
{
|
||||
|
@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
|
|||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_active)
|
||||
continue;
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot evict queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
q->properties.is_evicted = true;
|
||||
q->properties.is_active = false;
|
||||
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
|
||||
|
@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
|
|||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (!q->properties.is_evicted)
|
||||
continue;
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) { /* should not be here */
|
||||
pr_err("Cannot restore queue, mqd mgr is NULL\n");
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
q->properties.is_evicted = false;
|
||||
q->properties.is_active = true;
|
||||
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
|
||||
|
@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
|
|||
retval = dqm->asic_ops.update_qpd(dqm, qpd);
|
||||
|
||||
dqm->processes_count++;
|
||||
kfd_inc_compute_active(dqm->dev);
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
kfd_inc_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
|
|||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
|
|||
retval = 1;
|
||||
out:
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (!retval)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
|||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->xgmi_sdma_queue_count = 0;
|
||||
|
||||
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
|
||||
int pipe_offset = pipe * get_queues_per_pipe(dqm);
|
||||
|
@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
|
|||
}
|
||||
|
||||
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
|
|||
}
|
||||
|
||||
static int allocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int *sdma_queue_id)
|
||||
struct queue *q)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
if (dqm->sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
bit = __ffs64(dqm->sdma_bitmap);
|
||||
dqm->sdma_bitmap &= ~(1ULL << bit);
|
||||
q->sdma_id = bit;
|
||||
q->properties.sdma_engine_id = q->sdma_id %
|
||||
get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_queue_id = q->sdma_id /
|
||||
get_num_sdma_engines(dqm);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
if (dqm->xgmi_sdma_bitmap == 0)
|
||||
return -ENOMEM;
|
||||
bit = __ffs64(dqm->xgmi_sdma_bitmap);
|
||||
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
|
||||
q->sdma_id = bit;
|
||||
/* sdma_engine_id is sdma id including
|
||||
* both PCIe-optimized SDMAs and XGMI-
|
||||
* optimized SDMAs. The calculation below
|
||||
* assumes the first N engines are always
|
||||
* PCIe-optimized ones
|
||||
*/
|
||||
q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
|
||||
q->sdma_id % get_num_xgmi_sdma_engines(dqm);
|
||||
q->properties.sdma_queue_id = q->sdma_id /
|
||||
get_num_xgmi_sdma_engines(dqm);
|
||||
}
|
||||
|
||||
bit = ffs(dqm->sdma_bitmap) - 1;
|
||||
dqm->sdma_bitmap &= ~(1 << bit);
|
||||
*sdma_queue_id = bit;
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_queue_id)
|
||||
struct queue *q)
|
||||
{
|
||||
if (sdma_queue_id >= get_num_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->sdma_bitmap |= (1 << sdma_queue_id);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
if (q->sdma_id >= get_num_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
|
||||
return;
|
||||
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
|
||||
}
|
||||
}
|
||||
|
||||
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
||||
|
@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
struct mqd_manager *mqd_mgr;
|
||||
int retval;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
|
||||
if (!mqd_mgr)
|
||||
return -ENOMEM;
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
|
||||
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
retval = allocate_sdma_queue(dqm, q);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
pr_debug("SDMA id is: %d\n", q->sdma_id);
|
||||
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
|
||||
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
&q->gart_mqd_addr, &q->properties);
|
||||
|
@ -987,7 +1002,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
|
|||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
|
|||
INIT_LIST_HEAD(&dqm->queues);
|
||||
dqm->queue_count = dqm->processes_count = 0;
|
||||
dqm->sdma_queue_count = 0;
|
||||
dqm->xgmi_sdma_queue_count = 0;
|
||||
dqm->active_runlist = false;
|
||||
dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
|
||||
dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
|
||||
|
||||
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
|
||||
|
||||
|
@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
int retval;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
retval = 0;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
|
||||
pr_warn("Can't create new usermode queue because %d queues were already created\n",
|
||||
dqm->total_queue_count);
|
||||
retval = -EPERM;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
retval = allocate_sdma_queue(dqm, &q->sdma_id);
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
retval = allocate_sdma_queue(dqm, q);
|
||||
if (retval)
|
||||
goto out_unlock;
|
||||
q->properties.sdma_queue_id =
|
||||
q->sdma_id / get_num_sdma_engines(dqm);
|
||||
q->properties.sdma_engine_id =
|
||||
q->sdma_id % get_num_sdma_engines(dqm);
|
||||
goto out;
|
||||
}
|
||||
|
||||
retval = allocate_doorbell(qpd, q);
|
||||
if (retval)
|
||||
goto out_deallocate_sdma_queue;
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out_deallocate_doorbell;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
/*
|
||||
* Eviction state logic: we only mark active queues as evicted
|
||||
* to avoid the overhead of restoring inactive queues later
|
||||
|
@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
q->properties.is_evicted = (q->properties.queue_size > 0 &&
|
||||
q->properties.queue_percent > 0 &&
|
||||
q->properties.queue_address != 0);
|
||||
|
||||
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
|
||||
|
||||
q->properties.tba_addr = qpd->tba_addr;
|
||||
q->properties.tma_addr = qpd->tma_addr;
|
||||
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
|
||||
|
@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
if (retval)
|
||||
goto out_deallocate_doorbell;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
list_add(&q->list, &qpd->queues_list);
|
||||
qpd->queue_count++;
|
||||
if (q->properties.is_active) {
|
||||
|
@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
dqm->sdma_queue_count++;
|
||||
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
dqm->xgmi_sdma_queue_count++;
|
||||
/*
|
||||
* Unconditionally increment this counter, regardless of the queue's
|
||||
* type or whether the queue is active.
|
||||
|
@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
|
|||
out_deallocate_doorbell:
|
||||
deallocate_doorbell(qpd, q);
|
||||
out_deallocate_sdma_queue:
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
out_unlock:
|
||||
dqm_unlock(dqm);
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
|
||||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int unmap_sdma_queues(struct device_queue_manager *dqm,
|
||||
unsigned int sdma_engine)
|
||||
static int unmap_sdma_queues(struct device_queue_manager *dqm)
|
||||
{
|
||||
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
|
||||
sdma_engine);
|
||||
int i, retval = 0;
|
||||
|
||||
for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
|
||||
dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
|
||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
|
||||
if (retval)
|
||||
return retval;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* dqm->lock mutex has to be locked before calling this function */
|
||||
|
@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
|
|||
if (!dqm->active_runlist)
|
||||
return retval;
|
||||
|
||||
pr_debug("Before destroying queues, sdma queue count is : %u\n",
|
||||
dqm->sdma_queue_count);
|
||||
pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
|
||||
dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
|
||||
|
||||
if (dqm->sdma_queue_count > 0) {
|
||||
unmap_sdma_queues(dqm, 0);
|
||||
unmap_sdma_queues(dqm, 1);
|
||||
}
|
||||
if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
|
||||
unmap_sdma_queues(dqm);
|
||||
|
||||
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
|
||||
filter, filter_param, false, 0);
|
||||
|
@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
|||
|
||||
}
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
|
||||
deallocate_doorbell(qpd, q);
|
||||
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
}
|
||||
|
||||
list_del(&q->list);
|
||||
|
@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
|||
qpd->reset_wavefronts = true;
|
||||
}
|
||||
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
/*
|
||||
* Unconditionally decrement this counter, regardless of the queue's
|
||||
* type
|
||||
|
@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
|
|||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
|
||||
return retval;
|
||||
|
||||
failed:
|
||||
failed_try_destroy_debugged_queue:
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
|||
struct queue *q, *next;
|
||||
struct device_process_node *cur, *next_dpn;
|
||||
int retval = 0;
|
||||
bool found = false;
|
||||
|
||||
dqm_lock(dqm);
|
||||
|
||||
|
@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
|
|||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (found)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
|
|||
goto dqm_unlock;
|
||||
}
|
||||
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
|
||||
if (!mqd_mgr) {
|
||||
r = -ENOMEM;
|
||||
goto dqm_unlock;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
|
||||
|
||||
if (!mqd_mgr->get_wave_state) {
|
||||
r = -EINVAL;
|
||||
|
@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
|||
struct device_process_node *cur, *next_dpn;
|
||||
enum kfd_unmap_queues_filter filter =
|
||||
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
|
||||
bool found = false;
|
||||
|
||||
retval = 0;
|
||||
|
||||
|
@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
|||
list_for_each_entry(q, &qpd->queues_list, list) {
|
||||
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
|
||||
dqm->sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q->sdma_id);
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
|
||||
dqm->xgmi_sdma_queue_count--;
|
||||
deallocate_sdma_queue(dqm, q);
|
||||
}
|
||||
|
||||
if (q->properties.is_active)
|
||||
|
@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
|||
list_del(&cur->list);
|
||||
kfree(cur);
|
||||
dqm->processes_count--;
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
|
|||
qpd->reset_wavefronts = false;
|
||||
}
|
||||
|
||||
/* lastly, free mqd resources */
|
||||
dqm_unlock(dqm);
|
||||
|
||||
/* Outside the DQM lock because under the DQM lock we can't do
|
||||
* reclaim or take other locks that others hold while reclaiming.
|
||||
*/
|
||||
if (found)
|
||||
kfd_dec_compute_active(dqm->dev);
|
||||
|
||||
/* Lastly, free mqd resources.
|
||||
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
|
||||
*/
|
||||
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
|
||||
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
|
||||
get_mqd_type_from_queue_type(q->properties.type));
|
||||
if (!mqd_mgr) {
|
||||
retval = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
|
||||
q->properties.type)];
|
||||
list_del(&q->list);
|
||||
qpd->queue_count--;
|
||||
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
|
||||
}
|
||||
|
||||
out:
|
||||
dqm_unlock(dqm);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int init_mqd_managers(struct device_queue_manager *dqm)
|
||||
{
|
||||
int i, j;
|
||||
struct mqd_manager *mqd_mgr;
|
||||
|
||||
for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
|
||||
mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
|
||||
if (!mqd_mgr) {
|
||||
pr_err("mqd manager [%d] initialization failed\n", i);
|
||||
goto out_free;
|
||||
}
|
||||
dqm->mqd_mgrs[i] = mqd_mgr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
for (j = 0; j < i; j++) {
|
||||
kfree(dqm->mqd_mgrs[j]);
|
||||
dqm->mqd_mgrs[j] = NULL;
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
|
||||
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_dev *dev = dqm->dev;
|
||||
struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
|
||||
uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
|
||||
dev->device_info->num_sdma_engines *
|
||||
dev->device_info->num_sdma_queues_per_engine +
|
||||
dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
|
||||
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
|
||||
(void *)&(mem_obj->cpu_ptr), true);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
dqm->ops.stop = stop_cpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_cpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
||||
dqm->ops.register_process = register_process;
|
||||
dqm->ops.unregister_process = unregister_process;
|
||||
dqm->ops.uninitialize = uninitialize;
|
||||
|
@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
dqm->ops.create_queue = create_queue_nocpsch;
|
||||
dqm->ops.destroy_queue = destroy_queue_nocpsch;
|
||||
dqm->ops.update_queue = update_queue;
|
||||
dqm->ops.get_mqd_manager = get_mqd_manager;
|
||||
dqm->ops.register_process = register_process;
|
||||
dqm->ops.unregister_process = unregister_process;
|
||||
dqm->ops.initialize = initialize_nocpsch;
|
||||
|
@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
|
||||
break;
|
||||
|
||||
|
@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
goto out_free;
|
||||
}
|
||||
|
||||
if (init_mqd_managers(dqm))
|
||||
goto out_free;
|
||||
|
||||
if (allocate_hiq_sdma_mqd(dqm)) {
|
||||
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!dqm->ops.initialize(dqm))
|
||||
return dqm;
|
||||
|
||||
|
@ -1772,9 +1843,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
|
||||
{
|
||||
WARN(!mqd, "No hiq sdma mqd trunk to free");
|
||||
|
||||
amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
|
||||
}
|
||||
|
||||
void device_queue_manager_uninit(struct device_queue_manager *dqm)
|
||||
{
|
||||
dqm->ops.uninitialize(dqm);
|
||||
deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
|
||||
kfree(dqm);
|
||||
}
|
||||
|
||||
|
|
|
@ -48,8 +48,6 @@ struct device_process_node {
|
|||
*
|
||||
* @update_queue: Queue update routine.
|
||||
*
|
||||
* @get_mqd_manager: Returns the mqd manager according to the mqd type.
|
||||
*
|
||||
* @exeute_queues: Dispatches the queues list to the H/W.
|
||||
*
|
||||
* @register_process: This routine associates a specific process with device.
|
||||
|
@ -97,10 +95,6 @@ struct device_queue_manager_ops {
|
|||
int (*update_queue)(struct device_queue_manager *dqm,
|
||||
struct queue *q);
|
||||
|
||||
struct mqd_manager * (*get_mqd_manager)
|
||||
(struct device_queue_manager *dqm,
|
||||
enum KFD_MQD_TYPE type);
|
||||
|
||||
int (*register_process)(struct device_queue_manager *dqm,
|
||||
struct qcm_process_device *qpd);
|
||||
|
||||
|
@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
|
|||
void (*init_sdma_vm)(struct device_queue_manager *dqm,
|
||||
struct queue *q,
|
||||
struct qcm_process_device *qpd);
|
||||
struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -185,10 +181,12 @@ struct device_queue_manager {
|
|||
unsigned int processes_count;
|
||||
unsigned int queue_count;
|
||||
unsigned int sdma_queue_count;
|
||||
unsigned int xgmi_sdma_queue_count;
|
||||
unsigned int total_queue_count;
|
||||
unsigned int next_pipe_to_allocate;
|
||||
unsigned int *allocated_queues;
|
||||
unsigned int sdma_bitmap;
|
||||
uint64_t sdma_bitmap;
|
||||
uint64_t xgmi_sdma_bitmap;
|
||||
unsigned int vmid_bitmap;
|
||||
uint64_t pipelines_addr;
|
||||
struct kfd_mem_obj *pipeline_mem;
|
||||
|
@ -201,6 +199,7 @@ struct device_queue_manager {
|
|||
/* hw exception */
|
||||
bool is_hws_hang;
|
||||
struct work_struct hw_exception_work;
|
||||
struct kfd_mem_obj hiq_sdma_mqd;
|
||||
};
|
||||
|
||||
void device_queue_manager_init_cik(
|
||||
|
@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
|
|||
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
|
||||
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
|
||||
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
|
||||
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
|
||||
|
||||
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
|
||||
{
|
||||
|
|
|
@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
|
|||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_cik;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_cik_hawaii(
|
||||
|
@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
|
|||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
|
||||
asic_ops->update_qpd = update_qpd_cik_hawaii;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
|
|
|
@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
|
|||
{
|
||||
asic_ops->update_qpd = update_qpd_v9;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_v9;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_v9;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
|
||||
|
|
|
@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
|
|||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
|
||||
asic_ops->update_qpd = update_qpd_vi;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_vi;
|
||||
}
|
||||
|
||||
void device_queue_manager_init_vi_tonga(
|
||||
|
@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
|
|||
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
|
||||
asic_ops->update_qpd = update_qpd_vi_tonga;
|
||||
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
|
||||
asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
|
||||
}
|
||||
|
||||
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
|
||||
|
|
|
@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
|
|||
return; /* Presumably process exited. */
|
||||
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
|
||||
memory_exception_data.gpu_id = dev->id;
|
||||
memory_exception_data.failure.imprecise = 1;
|
||||
memory_exception_data.failure.imprecise = true;
|
||||
/* Set failure reason */
|
||||
if (info) {
|
||||
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
|
||||
|
|
|
@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
|
|||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kfd_init_apertures_vi(pdd, id);
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
|
@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
|
|||
kq->nop_packet = nop.u32all;
|
||||
switch (type) {
|
||||
case KFD_QUEUE_TYPE_DIQ:
|
||||
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_HIQ:
|
||||
kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
|
||||
KFD_MQD_TYPE_HIQ);
|
||||
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
|
||||
break;
|
||||
default:
|
||||
pr_err("Invalid queue type %d\n", type);
|
||||
|
@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
|
|||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
kernel_queue_init_vi(&kq->ops_asic_specific);
|
||||
break;
|
||||
|
||||
|
|
|
@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
|||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_map_queues__compute_vi;
|
||||
packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
|
||||
packet->bitfields2.queue_type =
|
||||
queue_type__mes_map_queues__normal_compute_vi;
|
||||
|
||||
|
@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
|||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
|
@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
|||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
|
|
|
@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
|||
|
||||
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
|
||||
sizeof(struct pm4_mes_map_queues));
|
||||
packet->bitfields2.alloc_format =
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi;
|
||||
packet->bitfields2.num_queues = 1;
|
||||
packet->bitfields2.queue_sel =
|
||||
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
|
||||
|
@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
|||
queue_type__mes_map_queues__debug_interface_queue_vi;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
|
||||
engine_sel__mes_map_queues__sdma0_vi;
|
||||
use_static = false; /* no static queues under SDMA */
|
||||
|
@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
|
|||
engine_sel__mes_unmap_queues__compute;
|
||||
break;
|
||||
case KFD_QUEUE_TYPE_SDMA:
|
||||
case KFD_QUEUE_TYPE_SDMA_XGMI:
|
||||
packet->bitfields2.engine_sel =
|
||||
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
|
||||
break;
|
||||
|
|
|
@ -23,34 +23,54 @@
|
|||
|
||||
#include "kfd_mqd_manager.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
#include "kfd_device_queue_manager.h"
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev)
|
||||
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
|
||||
{
|
||||
switch (dev->device_info->asic_family) {
|
||||
case CHIP_KAVERI:
|
||||
return mqd_manager_init_cik(type, dev);
|
||||
case CHIP_HAWAII:
|
||||
return mqd_manager_init_cik_hawaii(type, dev);
|
||||
case CHIP_CARRIZO:
|
||||
return mqd_manager_init_vi(type, dev);
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
return mqd_manager_init_vi_tonga(type, dev);
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
return mqd_manager_init_v9(type, dev);
|
||||
default:
|
||||
WARN(1, "Unexpected ASIC family %u",
|
||||
dev->device_info->asic_family);
|
||||
}
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
|
||||
return NULL;
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
|
||||
mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
|
||||
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
|
||||
mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
uint64_t offset;
|
||||
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
|
||||
offset = (q->sdma_engine_id *
|
||||
dev->device_info->num_sdma_queues_per_engine +
|
||||
q->sdma_queue_id) *
|
||||
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
|
||||
|
||||
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
|
||||
|
||||
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
|
||||
+ offset);
|
||||
mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
|
||||
mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
|
||||
dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
WARN_ON(!mqd_mem_obj->gtt_mem);
|
||||
kfree(mqd_mem_obj);
|
||||
}
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
|
|
|
@ -99,8 +99,16 @@ struct mqd_manager {
|
|||
|
||||
struct mutex mqd_mutex;
|
||||
struct kfd_dev *dev;
|
||||
uint32_t mqd_size;
|
||||
};
|
||||
|
||||
struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
|
||||
|
||||
struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
|
||||
struct queue_properties *q);
|
||||
void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj);
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask);
|
||||
|
|
|
@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
|||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
|
||||
&mqd_mem_obj))
|
||||
return NULL;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
|
@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
|||
uint64_t addr;
|
||||
struct cik_mqd *m;
|
||||
int retval;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
{
|
||||
int retval;
|
||||
struct cik_sdma_rlc_registers *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct cik_sdma_rlc_registers),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
|
|||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
|
||||
uint32_t queue_id, struct queue_properties *p,
|
||||
|
@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
|||
mqd->update_mqd = update_mqd;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct cik_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
|
|
@ -67,6 +67,43 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
|||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
int retval;
|
||||
struct kfd_mem_obj *mqd_mem_obj = NULL;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
/* From V9, for CWSR, the control stack is located on the next page
|
||||
* boundary after the mqd, we will use the gtt allocation function
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
|
||||
if (!mqd_mem_obj)
|
||||
return NULL;
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||
&(mqd_mem_obj->gtt_mem),
|
||||
&(mqd_mem_obj->gpu_addr),
|
||||
(void *)&(mqd_mem_obj->cpu_ptr), true);
|
||||
} else {
|
||||
retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
|
||||
&mqd_mem_obj);
|
||||
}
|
||||
|
||||
if (retval) {
|
||||
kfree(mqd_mem_obj);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mqd_mem_obj;
|
||||
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
|
@ -76,24 +113,8 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
|||
struct v9_mqd *m;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
/* From V9, for CWSR, the control stack is located on the next page
|
||||
* boundary after the mqd, we will use the gtt allocation function
|
||||
* instead of sub-allocation function.
|
||||
*/
|
||||
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
|
||||
*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
|
||||
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
|
||||
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
|
||||
&((*mqd_mem_obj)->gtt_mem),
|
||||
&((*mqd_mem_obj)->gpu_addr),
|
||||
(void *)&((*mqd_mem_obj)->cpu_ptr), true);
|
||||
} else
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
|
||||
mqd_mem_obj);
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
{
|
||||
int retval;
|
||||
struct v9_sdma_mqd *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct v9_sdma_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
|
@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
|
|||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->get_wave_state = get_wave_state;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct v9_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct v9_sdma_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
|
|
@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
|||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
|
||||
static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
|
||||
struct queue_properties *q)
|
||||
{
|
||||
struct kfd_mem_obj *mqd_mem_obj;
|
||||
|
||||
if (q->type == KFD_QUEUE_TYPE_HIQ)
|
||||
return allocate_hiq_mqd(kfd);
|
||||
|
||||
if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
|
||||
&mqd_mem_obj))
|
||||
return NULL;
|
||||
|
||||
return mqd_mem_obj;
|
||||
}
|
||||
|
||||
static int init_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
|
@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
|
|||
int retval;
|
||||
uint64_t addr;
|
||||
struct vi_mqd *m;
|
||||
struct kfd_dev *kfd = mm->dev;
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
|
||||
mqd_mem_obj);
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_mqd(kfd, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
{
|
||||
int retval;
|
||||
struct vi_sdma_mqd *m;
|
||||
struct kfd_dev *dev = mm->dev;
|
||||
|
||||
|
||||
retval = kfd_gtt_sa_allocate(mm->dev,
|
||||
sizeof(struct vi_sdma_mqd),
|
||||
mqd_mem_obj);
|
||||
|
||||
if (retval != 0)
|
||||
*mqd_mem_obj = allocate_sdma_mqd(dev, q);
|
||||
if (!*mqd_mem_obj)
|
||||
return -ENOMEM;
|
||||
|
||||
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
|
||||
|
@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
memset(m, 0, sizeof(struct vi_sdma_mqd));
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr != NULL)
|
||||
if (gart_addr)
|
||||
*gart_addr = (*mqd_mem_obj)->gpu_addr;
|
||||
|
||||
retval = mm->update_mqd(mm, m, q);
|
||||
|
@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
|
|||
return retval;
|
||||
}
|
||||
|
||||
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj)
|
||||
{
|
||||
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
|
||||
}
|
||||
|
||||
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
struct queue_properties *p, struct mm_struct *mms)
|
||||
|
@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
|
|||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->get_wave_state = get_wave_state;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_HIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_DIQ:
|
||||
mqd->init_mqd = init_mqd_hiq;
|
||||
mqd->uninit_mqd = uninit_mqd;
|
||||
mqd->load_mqd = load_mqd;
|
||||
mqd->update_mqd = update_mqd_hiq;
|
||||
mqd->destroy_mqd = destroy_mqd;
|
||||
mqd->is_occupied = is_occupied;
|
||||
mqd->mqd_size = sizeof(struct vi_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
break;
|
||||
case KFD_MQD_TYPE_SDMA:
|
||||
mqd->init_mqd = init_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_sdma;
|
||||
mqd->uninit_mqd = uninit_mqd_hiq_sdma;
|
||||
mqd->load_mqd = load_mqd_sdma;
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = destroy_mqd_sdma;
|
||||
mqd->is_occupied = is_occupied_sdma;
|
||||
mqd->mqd_size = sizeof(struct vi_sdma_mqd);
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
|
||||
#endif
|
||||
|
|
|
@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
|
|||
|
||||
process_count = pm->dqm->processes_count;
|
||||
queue_count = pm->dqm->queue_count;
|
||||
compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
|
||||
compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
|
||||
pm->dqm->xgmi_sdma_queue_count;
|
||||
|
||||
/* check if there is over subscription
|
||||
* Note: the arbitration between the number of VMIDs and
|
||||
|
@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
|
|||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
pm->pmf = &kfd_vi_pm_funcs;
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
|
|
|
@ -176,8 +176,7 @@ struct pm4_mes_map_process {
|
|||
|
||||
union {
|
||||
struct {
|
||||
uint32_t num_gws:6;
|
||||
uint32_t reserved7:1;
|
||||
uint32_t num_gws:7;
|
||||
uint32_t sdma_enable:1;
|
||||
uint32_t num_oac:4;
|
||||
uint32_t reserved8:4;
|
||||
|
@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
|
|||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||
};
|
||||
|
||||
enum mes_map_queues_alloc_format_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_engine_sel_enum {
|
||||
engine_sel__mes_map_queues__compute_vi = 0,
|
||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||
|
@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
|
|||
struct {
|
||||
uint32_t reserved1:4;
|
||||
enum mes_map_queues_queue_sel_enum queue_sel:2;
|
||||
uint32_t reserved2:15;
|
||||
uint32_t reserved5:6;
|
||||
uint32_t gws_control_queue:1;
|
||||
uint32_t reserved2:8;
|
||||
enum mes_map_queues_queue_type_enum queue_type:3;
|
||||
enum mes_map_queues_alloc_format_enum alloc_format:2;
|
||||
uint32_t reserved3:2;
|
||||
enum mes_map_queues_engine_sel_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
|
|
|
@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
|
|||
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
|
||||
};
|
||||
|
||||
enum mes_map_queues_alloc_format_vi_enum {
|
||||
alloc_format__mes_map_queues__one_per_pipe_vi = 0,
|
||||
alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
|
||||
};
|
||||
|
||||
enum mes_map_queues_engine_sel_vi_enum {
|
||||
engine_sel__mes_map_queues__compute_vi = 0,
|
||||
engine_sel__mes_map_queues__sdma0_vi = 2,
|
||||
|
@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
|
|||
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
|
||||
uint32_t reserved2:15;
|
||||
enum mes_map_queues_queue_type_vi_enum queue_type:3;
|
||||
enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
|
||||
uint32_t reserved3:2;
|
||||
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
|
||||
uint32_t num_queues:3;
|
||||
} bitfields2;
|
||||
|
|
|
@ -59,6 +59,7 @@
|
|||
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
|
||||
|
||||
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
|
||||
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
|
||||
|
@ -160,6 +161,11 @@ extern int noretry;
|
|||
*/
|
||||
extern int halt_if_hws_hang;
|
||||
|
||||
/*
|
||||
* Whether MEC FW support GWS barriers
|
||||
*/
|
||||
extern bool hws_gws_support;
|
||||
|
||||
enum cache_policy {
|
||||
cache_policy_coherent,
|
||||
cache_policy_noncoherent
|
||||
|
@ -188,6 +194,7 @@ struct kfd_device_info {
|
|||
bool needs_iommu_device;
|
||||
bool needs_pci_atomics;
|
||||
unsigned int num_sdma_engines;
|
||||
unsigned int num_xgmi_sdma_engines;
|
||||
unsigned int num_sdma_queues_per_engine;
|
||||
};
|
||||
|
||||
|
@ -258,7 +265,7 @@ struct kfd_dev {
|
|||
bool interrupts_active;
|
||||
|
||||
/* Debug manager */
|
||||
struct kfd_dbgmgr *dbgmgr;
|
||||
struct kfd_dbgmgr *dbgmgr;
|
||||
|
||||
/* Firmware versions */
|
||||
uint16_t mec_fw_version;
|
||||
|
@ -282,6 +289,9 @@ struct kfd_dev {
|
|||
|
||||
/* Compute Profile ref. count */
|
||||
atomic_t compute_profile;
|
||||
|
||||
/* Global GWS resource shared b/t processes*/
|
||||
void *gws;
|
||||
};
|
||||
|
||||
enum kfd_mempool {
|
||||
|
@ -329,7 +339,8 @@ enum kfd_queue_type {
|
|||
KFD_QUEUE_TYPE_COMPUTE,
|
||||
KFD_QUEUE_TYPE_SDMA,
|
||||
KFD_QUEUE_TYPE_HIQ,
|
||||
KFD_QUEUE_TYPE_DIQ
|
||||
KFD_QUEUE_TYPE_DIQ,
|
||||
KFD_QUEUE_TYPE_SDMA_XGMI
|
||||
};
|
||||
|
||||
enum kfd_queue_format {
|
||||
|
@ -444,6 +455,9 @@ struct queue_properties {
|
|||
*
|
||||
* @device: The kfd device that created this queue.
|
||||
*
|
||||
* @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
|
||||
* otherwise.
|
||||
*
|
||||
* This structure represents user mode compute queues.
|
||||
* It contains all the necessary data to handle such queues.
|
||||
*
|
||||
|
@ -465,6 +479,7 @@ struct queue {
|
|||
|
||||
struct kfd_process *process;
|
||||
struct kfd_dev *device;
|
||||
void *gws;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
|
|||
KFD_MQD_TYPE_HIQ, /* for hiq */
|
||||
KFD_MQD_TYPE_CP, /* for cp queues and diq */
|
||||
KFD_MQD_TYPE_SDMA, /* for sdma queues */
|
||||
KFD_MQD_TYPE_DIQ, /* for diq */
|
||||
KFD_MQD_TYPE_MAX
|
||||
};
|
||||
|
||||
|
@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
|
|||
void print_queue_properties(struct queue_properties *q);
|
||||
void print_queue(struct queue *q);
|
||||
|
||||
struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
|
||||
struct kfd_dev *dev);
|
||||
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
|
||||
|
@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
|
|||
struct queue_properties *p);
|
||||
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
|
||||
struct queue_properties *p);
|
||||
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
|
||||
void *gws);
|
||||
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
|
||||
unsigned int qid);
|
||||
int pqm_get_wave_state(struct process_queue_manager *pqm,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue