mirror of https://gitee.com/openkylin/linux.git
drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
The GRBM register interface is now capable of bursting 1 cycle per register wr->wr, wr->rd much faster than previous muticycle per transaction done interface. This has caused a problem where status registers requiring HW to update have a 1 cycle delay, due to the register update having to go through GRBM. For cp ucode, it has realized dummy read in cp firmware.It covers the use of WAIT_REG_MEM operation 1 case only.So it needs to call gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to update firmware in case firmware is too old to have function to realize dummy read in cp firmware. For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is moved to gfxhub in gfx10. So it needs to add dummy read in driver between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. Signed-off-by: changzhu <Changfeng.Zhu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
cf3fde893d
commit
a6522a5c63
|
@ -267,6 +267,7 @@ struct amdgpu_gfx {
|
||||||
uint32_t mec2_feature_version;
|
uint32_t mec2_feature_version;
|
||||||
bool mec_fw_write_wait;
|
bool mec_fw_write_wait;
|
||||||
bool me_fw_write_wait;
|
bool me_fw_write_wait;
|
||||||
|
bool cp_fw_write_wait;
|
||||||
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
|
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
|
||||||
unsigned num_gfx_rings;
|
unsigned num_gfx_rings;
|
||||||
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
|
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
|
||||||
|
|
|
@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
|
||||||
kfree(adev->gfx.rlc.register_list_format);
|
kfree(adev->gfx.rlc.register_list_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
|
||||||
|
{
|
||||||
|
adev->gfx.cp_fw_write_wait = false;
|
||||||
|
|
||||||
|
switch (adev->asic_type) {
|
||||||
|
case CHIP_NAVI10:
|
||||||
|
case CHIP_NAVI12:
|
||||||
|
case CHIP_NAVI14:
|
||||||
|
if ((adev->gfx.me_fw_version >= 0x00000046) &&
|
||||||
|
(adev->gfx.me_feature_version >= 27) &&
|
||||||
|
(adev->gfx.pfp_fw_version >= 0x00000068) &&
|
||||||
|
(adev->gfx.pfp_feature_version >= 27) &&
|
||||||
|
(adev->gfx.mec_fw_version >= 0x0000005b) &&
|
||||||
|
(adev->gfx.mec_feature_version >= 27))
|
||||||
|
adev->gfx.cp_fw_write_wait = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (adev->gfx.cp_fw_write_wait == false)
|
||||||
|
DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
|
||||||
|
GRBM requires 1-cycle delay in cp firmware\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
|
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
|
||||||
{
|
{
|
||||||
const struct rlc_firmware_header_v2_1 *rlc_hdr;
|
const struct rlc_firmware_header_v2_1 *rlc_hdr;
|
||||||
|
@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gfx_v10_0_check_fw_write_wait(adev);
|
||||||
out:
|
out:
|
||||||
if (err) {
|
if (err) {
|
||||||
dev_err(adev->dev,
|
dev_err(adev->dev,
|
||||||
|
@ -4766,6 +4793,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||||
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
|
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
|
||||||
|
uint32_t reg0, uint32_t reg1,
|
||||||
|
uint32_t ref, uint32_t mask)
|
||||||
|
{
|
||||||
|
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
|
||||||
|
struct amdgpu_device *adev = ring->adev;
|
||||||
|
bool fw_version_ok = false;
|
||||||
|
|
||||||
|
fw_version_ok = adev->gfx.cp_fw_write_wait;
|
||||||
|
|
||||||
|
if (fw_version_ok)
|
||||||
|
gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
|
||||||
|
ref, mask, 0x20);
|
||||||
|
else
|
||||||
|
amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
|
||||||
|
ref, mask);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
|
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
|
||||||
uint32_t me, uint32_t pipe,
|
uint32_t me, uint32_t pipe,
|
||||||
|
@ -5156,6 +5201,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
|
||||||
.emit_tmz = gfx_v10_0_ring_emit_tmz,
|
.emit_tmz = gfx_v10_0_ring_emit_tmz,
|
||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
||||||
|
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
||||||
|
@ -5189,6 +5235,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
|
||||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
||||||
|
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
||||||
|
@ -5219,6 +5266,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
|
||||||
.emit_rreg = gfx_v10_0_ring_emit_rreg,
|
.emit_rreg = gfx_v10_0_ring_emit_rreg,
|
||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
||||||
|
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
|
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
|
||||||
|
|
|
@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
|
||||||
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
|
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
|
||||||
upper_32_bits(pd_addr));
|
upper_32_bits(pd_addr));
|
||||||
|
|
||||||
amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
|
amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
|
||||||
|
hub->vm_inv_eng0_ack + eng,
|
||||||
/* wait for the invalidate to complete */
|
req, 1 << vmid);
|
||||||
amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
|
|
||||||
1 << vmid, 1 << vmid);
|
|
||||||
|
|
||||||
return pd_addr;
|
return pd_addr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
|
||||||
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
|
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
|
||||||
|
uint32_t reg0, uint32_t reg1,
|
||||||
|
uint32_t ref, uint32_t mask)
|
||||||
|
{
|
||||||
|
amdgpu_ring_emit_wreg(ring, reg0, ref);
|
||||||
|
/* wait for a cycle to reset vm_inv_eng*_ack */
|
||||||
|
amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
|
||||||
|
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
|
||||||
|
}
|
||||||
|
|
||||||
static int sdma_v5_0_early_init(void *handle)
|
static int sdma_v5_0_early_init(void *handle)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||||
|
@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
|
||||||
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
|
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
|
||||||
/* sdma_v5_0_ring_emit_vm_flush */
|
/* sdma_v5_0_ring_emit_vm_flush */
|
||||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
|
||||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
|
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
|
||||||
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
|
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
|
||||||
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
|
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
|
||||||
.emit_ib = sdma_v5_0_ring_emit_ib,
|
.emit_ib = sdma_v5_0_ring_emit_ib,
|
||||||
|
@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
|
||||||
.pad_ib = sdma_v5_0_ring_pad_ib,
|
.pad_ib = sdma_v5_0_ring_pad_ib,
|
||||||
.emit_wreg = sdma_v5_0_ring_emit_wreg,
|
.emit_wreg = sdma_v5_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
|
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
|
||||||
|
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
|
||||||
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
|
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
|
||||||
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
|
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
|
||||||
.preempt_ib = sdma_v5_0_ring_preempt_ib,
|
.preempt_ib = sdma_v5_0_ring_preempt_ib,
|
||||||
|
|
Loading…
Reference in New Issue