mirror of https://gitee.com/openkylin/linux.git
drm/amdgpu: revise RLCG access path
what changed: 1)provide new implementation interface for the rlcg access path 2)put SQ_CMD/SQ_IND_INDEX to GFX9 RLCG path to let debugfs's reg_op function can access reg that need RLCG path help now even debugfs's reg_op can used to dump wave. tested-by: Monk Liu <monk.liu@amd.com> tested-by: Zhou pengju <pengju.zhou@amd.com> Signed-off-by: Zhou pengju <pengju.zhou@amd.com> Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Emily Deng <Emily.Deng@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
a7fbb630c5
commit
2e0cc4d48b
|
@ -994,6 +994,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
|
|||
uint32_t acc_flags);
|
||||
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags);
|
||||
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags);
|
||||
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
|
||||
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
|
||||
|
||||
|
|
|
@ -179,7 +179,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
|
|||
} else {
|
||||
r = get_user(value, (uint32_t *)buf);
|
||||
if (!r)
|
||||
WREG32(*pos >> 2, value);
|
||||
amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
|
||||
}
|
||||
if (r) {
|
||||
result = r;
|
||||
|
|
|
@ -306,28 +306,10 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
|
|||
BUG();
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mm_wreg - write to a memory mapped IO register
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @reg: dword aligned register offset
|
||||
* @v: 32 bit value to write to the register
|
||||
* @acc_flags: access flags which require special behavior
|
||||
*
|
||||
* Writes the value specified to the offset specified.
|
||||
*/
|
||||
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags)
|
||||
void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
|
||||
{
|
||||
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
|
||||
|
||||
if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
|
||||
adev->last_mm_index = v;
|
||||
}
|
||||
|
||||
if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
|
||||
return amdgpu_kiq_wreg(adev, reg, v);
|
||||
|
||||
if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
|
||||
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
|
||||
else {
|
||||
|
@ -344,6 +326,48 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_mm_wreg - write to a memory mapped IO register
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @reg: dword aligned register offset
|
||||
* @v: 32 bit value to write to the register
|
||||
* @acc_flags: access flags which require special behavior
|
||||
*
|
||||
* Writes the value specified to the offset specified.
|
||||
*/
|
||||
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags)
|
||||
{
|
||||
if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
|
||||
adev->last_mm_index = v;
|
||||
}
|
||||
|
||||
if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
|
||||
return amdgpu_kiq_wreg(adev, reg, v);
|
||||
|
||||
amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
|
||||
*
|
||||
* this function is invoked only the debugfs register access
|
||||
* */
|
||||
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
|
||||
uint32_t acc_flags)
|
||||
{
|
||||
if (amdgpu_sriov_fullaccess(adev) &&
|
||||
adev->gfx.rlc.funcs &&
|
||||
adev->gfx.rlc.funcs->is_rlcg_access_range) {
|
||||
|
||||
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
|
||||
return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
|
||||
}
|
||||
|
||||
amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_io_rreg - read an IO register
|
||||
*
|
||||
|
|
|
@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
|
|||
void (*reset)(struct amdgpu_device *adev);
|
||||
void (*start)(struct amdgpu_device *adev);
|
||||
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
|
||||
void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
|
||||
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
|
||||
};
|
||||
|
||||
struct amdgpu_rlc {
|
||||
|
|
|
@ -270,6 +270,9 @@ struct amdgpu_virt {
|
|||
#define amdgpu_sriov_runtime(adev) \
|
||||
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
|
||||
|
||||
#define amdgpu_sriov_fullaccess(adev) \
|
||||
(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
|
||||
|
||||
#define amdgpu_passthrough(adev) \
|
||||
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
|
||||
|
||||
|
|
|
@ -224,6 +224,49 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
|
|||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
|
||||
};
|
||||
|
||||
static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
|
||||
{
|
||||
static void *scratch_reg0;
|
||||
static void *scratch_reg1;
|
||||
static void *scratch_reg2;
|
||||
static void *scratch_reg3;
|
||||
static void *spare_int;
|
||||
static uint32_t grbm_cntl;
|
||||
static uint32_t grbm_idx;
|
||||
uint32_t i = 0;
|
||||
uint32_t retries = 50000;
|
||||
|
||||
scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
|
||||
scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
|
||||
scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
|
||||
scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
|
||||
spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
|
||||
|
||||
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
|
||||
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
|
||||
|
||||
if (amdgpu_sriov_runtime(adev)) {
|
||||
pr_err("shoudn't call rlcg write register during runtime\n");
|
||||
return;
|
||||
}
|
||||
|
||||
writel(v, scratch_reg0);
|
||||
writel(offset | 0x80000000, scratch_reg1);
|
||||
writel(1, spare_int);
|
||||
for (i = 0; i < retries; i++) {
|
||||
u32 tmp;
|
||||
|
||||
tmp = readl(scratch_reg1);
|
||||
if (!(tmp & 0x80000000))
|
||||
break;
|
||||
|
||||
udelay(10);
|
||||
}
|
||||
|
||||
if (i >= retries)
|
||||
pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
|
||||
}
|
||||
|
||||
static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
|
||||
{
|
||||
/* Pending on emulation bring up */
|
||||
|
@ -4247,6 +4290,33 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
|||
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
|
||||
}
|
||||
|
||||
static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
|
||||
uint32_t offset,
|
||||
struct soc15_reg_rlcg *entries, int arr_size)
|
||||
{
|
||||
int i;
|
||||
uint32_t reg;
|
||||
|
||||
if (!entries)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < arr_size; i++) {
|
||||
const struct soc15_reg_rlcg *entry;
|
||||
|
||||
entry = &entries[i];
|
||||
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
|
||||
if (offset == reg)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
|
||||
{
|
||||
return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
|
||||
}
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
|
||||
.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
|
||||
.set_safe_mode = gfx_v10_0_set_safe_mode,
|
||||
|
@ -4258,7 +4328,9 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
|
|||
.stop = gfx_v10_0_rlc_stop,
|
||||
.reset = gfx_v10_0_rlc_reset,
|
||||
.start = gfx_v10_0_rlc_start,
|
||||
.update_spm_vmid = gfx_v10_0_update_spm_vmid
|
||||
.update_spm_vmid = gfx_v10_0_update_spm_vmid,
|
||||
.rlcg_wreg = gfx_v10_rlcg_wreg,
|
||||
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
|
||||
};
|
||||
|
||||
static int gfx_v10_0_set_powergating_state(void *handle,
|
||||
|
|
|
@ -697,6 +697,11 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
|
|||
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
|
||||
};
|
||||
|
||||
static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
|
||||
{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
|
||||
{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
|
||||
};
|
||||
|
||||
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
|
||||
{
|
||||
mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
|
||||
|
@ -721,6 +726,63 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
|
|||
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
|
||||
};
|
||||
|
||||
void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
|
||||
{
|
||||
static void *scratch_reg0;
|
||||
static void *scratch_reg1;
|
||||
static void *scratch_reg2;
|
||||
static void *scratch_reg3;
|
||||
static void *spare_int;
|
||||
static uint32_t grbm_cntl;
|
||||
static uint32_t grbm_idx;
|
||||
bool shadow;
|
||||
|
||||
scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
|
||||
scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
|
||||
scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
|
||||
scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
|
||||
spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
|
||||
|
||||
grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
|
||||
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
|
||||
|
||||
if (amdgpu_sriov_runtime(adev)) {
|
||||
pr_err("shoudn't call rlcg write register during runtime\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (offset == grbm_cntl || offset == grbm_idx)
|
||||
shadow = true;
|
||||
|
||||
if (shadow) {
|
||||
if (offset == grbm_cntl)
|
||||
writel(v, scratch_reg2);
|
||||
else if (offset == grbm_idx)
|
||||
writel(v, scratch_reg3);
|
||||
|
||||
writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
|
||||
} else {
|
||||
uint32_t i = 0;
|
||||
uint32_t retries = 50000;
|
||||
|
||||
writel(v, scratch_reg0);
|
||||
writel(offset | 0x80000000, scratch_reg1);
|
||||
writel(1, spare_int);
|
||||
for (i = 0; i < retries; i++) {
|
||||
u32 tmp;
|
||||
|
||||
tmp = readl(scratch_reg1);
|
||||
if (!(tmp & 0x80000000))
|
||||
break;
|
||||
|
||||
udelay(10);
|
||||
}
|
||||
if (i >= retries)
|
||||
pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
|
||||
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
|
||||
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
|
||||
|
@ -1921,7 +1983,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
|
|||
|
||||
static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
|
||||
{
|
||||
WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
|
||||
WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
|
||||
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
|
||||
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
|
||||
(address << SQ_IND_INDEX__INDEX__SHIFT) |
|
||||
|
@ -1933,7 +1995,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
|
|||
uint32_t wave, uint32_t thread,
|
||||
uint32_t regno, uint32_t num, uint32_t *out)
|
||||
{
|
||||
WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
|
||||
WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
|
||||
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
|
||||
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
|
||||
(regno << SQ_IND_INDEX__INDEX__SHIFT) |
|
||||
|
@ -4908,6 +4970,35 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
|
|||
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
|
||||
}
|
||||
|
||||
static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
|
||||
uint32_t offset,
|
||||
struct soc15_reg_rlcg *entries, int arr_size)
|
||||
{
|
||||
int i;
|
||||
uint32_t reg;
|
||||
|
||||
if (!entries)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < arr_size; i++) {
|
||||
const struct soc15_reg_rlcg *entry;
|
||||
|
||||
entry = &entries[i];
|
||||
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
|
||||
if (offset == reg)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
|
||||
{
|
||||
return gfx_v9_0_check_rlcg_range(adev, offset,
|
||||
(void *)rlcg_access_gc_9_0,
|
||||
ARRAY_SIZE(rlcg_access_gc_9_0));
|
||||
}
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
|
||||
.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
|
||||
.set_safe_mode = gfx_v9_0_set_safe_mode,
|
||||
|
@ -4920,7 +5011,9 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
|
|||
.stop = gfx_v9_0_rlc_stop,
|
||||
.reset = gfx_v9_0_rlc_reset,
|
||||
.start = gfx_v9_0_rlc_start,
|
||||
.update_spm_vmid = gfx_v9_0_update_spm_vmid
|
||||
.update_spm_vmid = gfx_v9_0_update_spm_vmid,
|
||||
.rlcg_wreg = gfx_v9_0_rlcg_wreg,
|
||||
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
|
||||
};
|
||||
|
||||
static int gfx_v9_0_set_powergating_state(void *handle,
|
||||
|
|
|
@ -42,6 +42,13 @@ struct soc15_reg_golden {
|
|||
u32 or_mask;
|
||||
};
|
||||
|
||||
struct soc15_reg_rlcg {
|
||||
u32 hwip;
|
||||
u32 instance;
|
||||
u32 segment;
|
||||
u32 reg;
|
||||
};
|
||||
|
||||
struct soc15_reg_entry {
|
||||
uint32_t hwip;
|
||||
uint32_t inst;
|
||||
|
|
|
@ -70,10 +70,9 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))
|
||||
#define WREG32_RLC(reg, value) \
|
||||
do { \
|
||||
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
|
||||
if (amdgpu_sriov_fullaccess(adev)) { \
|
||||
uint32_t i = 0; \
|
||||
uint32_t retries = 50000; \
|
||||
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
|
||||
|
@ -98,7 +97,7 @@
|
|||
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
|
||||
do { \
|
||||
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
|
||||
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
|
||||
if (amdgpu_sriov_fullaccess(adev)) { \
|
||||
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
|
||||
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
|
||||
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
|
||||
|
|
Loading…
Reference in New Issue