drm/amd/gfx: add instance field to select_se_sh (v3)

Add ability to specify instance in select_se_sh callback.
Defaults to 0xffffffff all over the driver.

(v2) Don't enable INSTANCE_BROADCAST by default
(v3) Style changes

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Tom St Denis 2016-06-28 10:26:48 -04:00 committed by Alex Deucher
parent b636a1b3d6
commit 9559ef5b12
5 changed files with 39 additions and 29 deletions

View File

@ -1159,7 +1159,7 @@ struct amdgpu_cu_info {
struct amdgpu_gfx_funcs { struct amdgpu_gfx_funcs {
/* get the gpu clock counter */ /* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
}; };
struct amdgpu_gfx { struct amdgpu_gfx {
@ -2289,7 +2289,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
#define amdgpu_gfx_select_se_sh(adev, se, sh) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
#define amdgpu_dpm_get_temperature(adev) \ #define amdgpu_dpm_get_temperature(adev) \
((adev)->pp_enabled ? \ ((adev)->pp_enabled ? \

View File

@ -1035,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, se_num, sh_num); amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
val = RREG32(reg_offset); val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
return val; return val;
} }

View File

@ -1584,9 +1584,14 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* broadcast to all SEs or SHs (CIK). * broadcast to all SEs or SHs (CIK).
*/ */
static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
u32 se_num, u32 sh_num) u32 se_num, u32 sh_num, u32 instance)
{ {
u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK; u32 data;
if (instance == 0xffffffff)
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
@ -1660,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
data = gfx_v7_0_get_rb_active_bitmap(adev); data = gfx_v7_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh); rb_bitmap_width_per_sh);
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.backend_enable_mask = active_rbs;
@ -1747,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted * making sure that the following register writes will be broadcasted
* to all the shaders * to all the shaders
*/ */
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
/* XXX SH_MEM regs */ /* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */ /* where to put LDS, scratch, GPUVM in FSA64 space */
@ -3381,7 +3386,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
for (k = 0; k < adev->usec_timeout; k++) { for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break; break;
@ -3389,7 +3394,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
} }
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@ -3549,7 +3554,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_PARAMS, 0x00600408);
WREG32(mmRLC_LB_CNTL, 0x80000004); WREG32(mmRLC_LB_CNTL, 0x80000004);
@ -3589,7 +3594,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@ -3640,7 +3645,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@ -3691,7 +3696,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
tmp = gfx_v7_0_halt_rlc(adev); tmp = gfx_v7_0_halt_rlc(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@ -5072,7 +5077,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
gfx_v7_0_select_se_sh(adev, i, j); gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
if (i < 4 && j < 2) if (i < 4 && j < 2)
gfx_v7_0_set_user_cu_inactive_bitmap( gfx_v7_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]); adev, disable_masks[i * 2 + j]);
@ -5091,7 +5096,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;

View File

@ -3447,9 +3447,14 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
} }
static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
u32 se_num, u32 sh_num) u32 se_num, u32 sh_num, u32 instance)
{ {
u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); u32 data;
if (instance == 0xffffffff)
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
else
data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
@ -3499,13 +3504,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
data = gfx_v8_0_get_rb_active_bitmap(adev); data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh); rb_bitmap_width_per_sh);
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.backend_enable_mask = active_rbs;
@ -3609,7 +3614,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted * making sure that the following register writes will be broadcasted
* to all the shaders * to all the shaders
*/ */
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmPA_SC_FIFO_SIZE, WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend << (adev->gfx.config.sc_prim_fifo_size_frontend <<
@ -3632,7 +3637,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
for (k = 0; k < adev->usec_timeout; k++) { for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break; break;
@ -3640,7 +3645,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
} }
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@ -5409,7 +5414,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{ {
uint32_t data; uint32_t data;
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@ -6518,7 +6523,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
mask = 1; mask = 1;
ao_bitmap = 0; ao_bitmap = 0;
counter = 0; counter = 0;
gfx_v8_0_select_se_sh(adev, i, j); gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
if (i < 4 && j < 2) if (i < 4 && j < 2)
gfx_v8_0_set_user_cu_inactive_bitmap( gfx_v8_0_set_user_cu_inactive_bitmap(
adev, disable_masks[i * 2 + j]); adev, disable_masks[i * 2 + j]);
@ -6537,7 +6542,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
} }
} }
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number; cu_info->number = active_cu_number;

View File

@ -533,12 +533,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, se_num, sh_num); amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
val = RREG32(reg_offset); val = RREG32(reg_offset);
if (se_num != 0xffffffff || sh_num != 0xffffffff) if (se_num != 0xffffffff || sh_num != 0xffffffff)
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff); amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
return val; return val;
} }