drm/i915/vlv: WA for Turbo and RC6 to work together.
With RC6 enabled, BYT has an HW issue in determining the right Gfx busyness. WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide on increasing/decreasing the freq. This logic will monitor C0 counters of render/media power-wells over EI period and takes necessary action based on these values v2: Refactor duplicate code. (Ville) v3: Reformat the comments. (Ville) v4: Enable required counters and remove unwanted code (Ville) v5: Added frequency change acceleration support and remove kernel-doc style comments. (Ville) v6: Updated comment section and Fix w/a comment. (Ville) Signed-off-by: Deepak S <deepak.s@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
parent
8e09bf837f
commit
31685c258e
|
@ -902,6 +902,12 @@ struct vlv_s0ix_state {
|
||||||
u32 clock_gate_dis2;
|
u32 clock_gate_dis2;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct intel_rps_ei_calc {
|
||||||
|
u32 cz_ts_ei;
|
||||||
|
u32 render_ei_c0;
|
||||||
|
u32 media_ei_c0;
|
||||||
|
};
|
||||||
|
|
||||||
struct intel_gen6_power_mgmt {
|
struct intel_gen6_power_mgmt {
|
||||||
/* work and pm_iir are protected by dev_priv->irq_lock */
|
/* work and pm_iir are protected by dev_priv->irq_lock */
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
|
@ -926,6 +932,8 @@ struct intel_gen6_power_mgmt {
|
||||||
u8 rp1_freq; /* "less than" RP0 power/freqency */
|
u8 rp1_freq; /* "less than" RP0 power/freqency */
|
||||||
u8 rp0_freq; /* Non-overclocked max frequency. */
|
u8 rp0_freq; /* Non-overclocked max frequency. */
|
||||||
|
|
||||||
|
u32 ei_interrupt_count;
|
||||||
|
|
||||||
int last_adj;
|
int last_adj;
|
||||||
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
|
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
|
||||||
|
|
||||||
|
@ -1527,6 +1535,13 @@ struct drm_i915_private {
|
||||||
/* gen6+ rps state */
|
/* gen6+ rps state */
|
||||||
struct intel_gen6_power_mgmt rps;
|
struct intel_gen6_power_mgmt rps;
|
||||||
|
|
||||||
|
/* rps wa up ei calculation */
|
||||||
|
struct intel_rps_ei_calc rps_up_ei;
|
||||||
|
|
||||||
|
/* rps wa down ei calculation */
|
||||||
|
struct intel_rps_ei_calc rps_down_ei;
|
||||||
|
|
||||||
|
|
||||||
/* ilk-only ips/rps state. Everything in here is protected by the global
|
/* ilk-only ips/rps state. Everything in here is protected by the global
|
||||||
* mchdev_lock in intel_pm.c */
|
* mchdev_lock in intel_pm.c */
|
||||||
struct intel_ilk_power_mgmt ips;
|
struct intel_ilk_power_mgmt ips;
|
||||||
|
|
|
@ -1272,6 +1272,131 @@ static void notify_ring(struct drm_device *dev,
|
||||||
i915_queue_hangcheck(dev);
|
i915_queue_hangcheck(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
|
||||||
|
struct intel_rps_ei_calc *rps_ei)
|
||||||
|
{
|
||||||
|
u32 cz_ts, cz_freq_khz;
|
||||||
|
u32 render_count, media_count;
|
||||||
|
u32 elapsed_render, elapsed_media, elapsed_time;
|
||||||
|
u32 residency = 0;
|
||||||
|
|
||||||
|
cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
|
||||||
|
cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
|
||||||
|
|
||||||
|
render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
|
||||||
|
media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
|
||||||
|
|
||||||
|
if (rps_ei->cz_ts_ei == 0) {
|
||||||
|
rps_ei->cz_ts_ei = cz_ts;
|
||||||
|
rps_ei->render_ei_c0 = render_count;
|
||||||
|
rps_ei->media_ei_c0 = media_count;
|
||||||
|
|
||||||
|
return dev_priv->rps.cur_freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
elapsed_time = cz_ts - rps_ei->cz_ts_ei;
|
||||||
|
rps_ei->cz_ts_ei = cz_ts;
|
||||||
|
|
||||||
|
elapsed_render = render_count - rps_ei->render_ei_c0;
|
||||||
|
rps_ei->render_ei_c0 = render_count;
|
||||||
|
|
||||||
|
elapsed_media = media_count - rps_ei->media_ei_c0;
|
||||||
|
rps_ei->media_ei_c0 = media_count;
|
||||||
|
|
||||||
|
/* Convert all the counters into common unit of milli sec */
|
||||||
|
elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
|
||||||
|
elapsed_render /= cz_freq_khz;
|
||||||
|
elapsed_media /= cz_freq_khz;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate overall C0 residency percentage
|
||||||
|
* only if elapsed time is non zero
|
||||||
|
*/
|
||||||
|
if (elapsed_time) {
|
||||||
|
residency =
|
||||||
|
((max(elapsed_render, elapsed_media) * 100)
|
||||||
|
/ elapsed_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
return residency;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
|
||||||
|
* busy-ness calculated from C0 counters of render & media power wells
|
||||||
|
* @dev_priv: DRM device private
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
|
||||||
|
{
|
||||||
|
u32 residency_C0_up = 0, residency_C0_down = 0;
|
||||||
|
u8 new_delay, adj;
|
||||||
|
|
||||||
|
dev_priv->rps.ei_interrupt_count++;
|
||||||
|
|
||||||
|
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
|
||||||
|
|
||||||
|
|
||||||
|
if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
|
||||||
|
vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
|
||||||
|
vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
|
||||||
|
return dev_priv->rps.cur_freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To down throttle, C0 residency should be less than down threshold
|
||||||
|
* for continous EI intervals. So calculate down EI counters
|
||||||
|
* once in VLV_INT_COUNT_FOR_DOWN_EI
|
||||||
|
*/
|
||||||
|
if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
|
||||||
|
|
||||||
|
dev_priv->rps.ei_interrupt_count = 0;
|
||||||
|
|
||||||
|
residency_C0_down = vlv_c0_residency(dev_priv,
|
||||||
|
&dev_priv->rps_down_ei);
|
||||||
|
} else {
|
||||||
|
residency_C0_up = vlv_c0_residency(dev_priv,
|
||||||
|
&dev_priv->rps_up_ei);
|
||||||
|
}
|
||||||
|
|
||||||
|
new_delay = dev_priv->rps.cur_freq;
|
||||||
|
|
||||||
|
adj = dev_priv->rps.last_adj;
|
||||||
|
/* C0 residency is greater than UP threshold. Increase Frequency */
|
||||||
|
if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
|
||||||
|
if (adj > 0)
|
||||||
|
adj *= 2;
|
||||||
|
else
|
||||||
|
adj = 1;
|
||||||
|
|
||||||
|
if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
|
||||||
|
new_delay = dev_priv->rps.cur_freq + adj;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For better performance, jump directly
|
||||||
|
* to RPe if we're below it.
|
||||||
|
*/
|
||||||
|
if (new_delay < dev_priv->rps.efficient_freq)
|
||||||
|
new_delay = dev_priv->rps.efficient_freq;
|
||||||
|
|
||||||
|
} else if (!dev_priv->rps.ei_interrupt_count &&
|
||||||
|
(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
|
||||||
|
if (adj < 0)
|
||||||
|
adj *= 2;
|
||||||
|
else
|
||||||
|
adj = -1;
|
||||||
|
/*
|
||||||
|
* This means, C0 residency is less than down threshold over
|
||||||
|
* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
|
||||||
|
*/
|
||||||
|
if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
|
||||||
|
new_delay = dev_priv->rps.cur_freq + adj;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_delay;
|
||||||
|
}
|
||||||
|
|
||||||
static void gen6_pm_rps_work(struct work_struct *work)
|
static void gen6_pm_rps_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct drm_i915_private *dev_priv =
|
struct drm_i915_private *dev_priv =
|
||||||
|
@ -1320,6 +1445,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
|
||||||
else
|
else
|
||||||
new_delay = dev_priv->rps.min_freq_softlimit;
|
new_delay = dev_priv->rps.min_freq_softlimit;
|
||||||
adj = 0;
|
adj = 0;
|
||||||
|
} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
|
||||||
|
new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
|
||||||
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
|
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
|
||||||
if (adj < 0)
|
if (adj < 0)
|
||||||
adj *= 2;
|
adj *= 2;
|
||||||
|
@ -4511,7 +4638,11 @@ void intel_irq_init(struct drm_device *dev)
|
||||||
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
|
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
|
||||||
|
|
||||||
/* Let's track the enabled rps events */
|
/* Let's track the enabled rps events */
|
||||||
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
|
if (IS_VALLEYVIEW(dev))
|
||||||
|
/* WaGsvRC0ResidenncyMethod:VLV */
|
||||||
|
dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
|
||||||
|
else
|
||||||
|
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
|
||||||
|
|
||||||
setup_timer(&dev_priv->gpu_error.hangcheck_timer,
|
setup_timer(&dev_priv->gpu_error.hangcheck_timer,
|
||||||
i915_hangcheck_elapsed,
|
i915_hangcheck_elapsed,
|
||||||
|
|
|
@ -531,6 +531,7 @@ enum punit_power_well {
|
||||||
#define PUNIT_REG_GPU_FREQ_STS 0xd8
|
#define PUNIT_REG_GPU_FREQ_STS 0xd8
|
||||||
#define GENFREQSTATUS (1<<0)
|
#define GENFREQSTATUS (1<<0)
|
||||||
#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc
|
#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc
|
||||||
|
#define PUNIT_REG_CZ_TIMESTAMP 0xce
|
||||||
|
|
||||||
#define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */
|
#define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */
|
||||||
#define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */
|
#define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */
|
||||||
|
@ -556,6 +557,11 @@ enum punit_power_well {
|
||||||
#define FB_FMAX_VMIN_FREQ_LO_SHIFT 27
|
#define FB_FMAX_VMIN_FREQ_LO_SHIFT 27
|
||||||
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
|
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
|
||||||
|
|
||||||
|
#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
|
||||||
|
#define VLV_RP_UP_EI_THRESHOLD 90
|
||||||
|
#define VLV_RP_DOWN_EI_THRESHOLD 70
|
||||||
|
#define VLV_INT_COUNT_FOR_DOWN_EI 5
|
||||||
|
|
||||||
/* vlv2 north clock has */
|
/* vlv2 north clock has */
|
||||||
#define CCK_FUSE_REG 0x8
|
#define CCK_FUSE_REG 0x8
|
||||||
#define CCK_FUSE_HPLL_FREQ_MASK 0x3
|
#define CCK_FUSE_HPLL_FREQ_MASK 0x3
|
||||||
|
@ -5394,6 +5400,7 @@ enum punit_power_well {
|
||||||
#define VLV_GTLC_ALLOWWAKEERR (1 << 1)
|
#define VLV_GTLC_ALLOWWAKEERR (1 << 1)
|
||||||
#define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5)
|
#define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5)
|
||||||
#define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7)
|
#define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7)
|
||||||
|
#define VLV_GTLC_SURVIVABILITY_REG 0x130098
|
||||||
#define FORCEWAKE_MT 0xa188 /* multi-threaded */
|
#define FORCEWAKE_MT 0xa188 /* multi-threaded */
|
||||||
#define FORCEWAKE_KERNEL 0x1
|
#define FORCEWAKE_KERNEL 0x1
|
||||||
#define FORCEWAKE_USER 0x2
|
#define FORCEWAKE_USER 0x2
|
||||||
|
@ -5541,6 +5548,8 @@ enum punit_power_well {
|
||||||
#define GEN6_GT_GFX_RC6_LOCKED 0x138104
|
#define GEN6_GT_GFX_RC6_LOCKED 0x138104
|
||||||
#define VLV_COUNTER_CONTROL 0x138104
|
#define VLV_COUNTER_CONTROL 0x138104
|
||||||
#define VLV_COUNT_RANGE_HIGH (1<<15)
|
#define VLV_COUNT_RANGE_HIGH (1<<15)
|
||||||
|
#define VLV_MEDIA_RC0_COUNT_EN (1<<5)
|
||||||
|
#define VLV_RENDER_RC0_COUNT_EN (1<<4)
|
||||||
#define VLV_MEDIA_RC6_COUNT_EN (1<<1)
|
#define VLV_MEDIA_RC6_COUNT_EN (1<<1)
|
||||||
#define VLV_RENDER_RC6_COUNT_EN (1<<0)
|
#define VLV_RENDER_RC6_COUNT_EN (1<<0)
|
||||||
#define GEN6_GT_GFX_RC6 0x138108
|
#define GEN6_GT_GFX_RC6 0x138108
|
||||||
|
@ -5549,6 +5558,8 @@ enum punit_power_well {
|
||||||
|
|
||||||
#define GEN6_GT_GFX_RC6p 0x13810C
|
#define GEN6_GT_GFX_RC6p 0x13810C
|
||||||
#define GEN6_GT_GFX_RC6pp 0x138110
|
#define GEN6_GT_GFX_RC6pp 0x138110
|
||||||
|
#define VLV_RENDER_C0_COUNT_REG 0x138118
|
||||||
|
#define VLV_MEDIA_C0_COUNT_REG 0x13811C
|
||||||
|
|
||||||
#define GEN6_PCODE_MAILBOX 0x138124
|
#define GEN6_PCODE_MAILBOX 0x138124
|
||||||
#define GEN6_PCODE_READY (1<<31)
|
#define GEN6_PCODE_READY (1<<31)
|
||||||
|
|
|
@ -3282,8 +3282,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
|
||||||
|
|
||||||
vlv_force_gfx_clock(dev_priv, false);
|
vlv_force_gfx_clock(dev_priv, false);
|
||||||
|
|
||||||
I915_WRITE(GEN6_PMINTRMSK,
|
if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
|
||||||
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
|
I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
|
||||||
|
else
|
||||||
|
I915_WRITE(GEN6_PMINTRMSK,
|
||||||
|
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
|
||||||
}
|
}
|
||||||
|
|
||||||
void gen6_rps_idle(struct drm_i915_private *dev_priv)
|
void gen6_rps_idle(struct drm_i915_private *dev_priv)
|
||||||
|
@ -4125,6 +4128,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
|
||||||
I915_WRITE(GEN6_RP_DOWN_EI, 350000);
|
I915_WRITE(GEN6_RP_DOWN_EI, 350000);
|
||||||
|
|
||||||
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
|
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
|
||||||
|
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
|
||||||
|
|
||||||
I915_WRITE(GEN6_RP_CONTROL,
|
I915_WRITE(GEN6_RP_CONTROL,
|
||||||
GEN6_RP_MEDIA_TURBO |
|
GEN6_RP_MEDIA_TURBO |
|
||||||
|
@ -4145,9 +4149,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
|
||||||
|
|
||||||
/* allows RC6 residency counter to work */
|
/* allows RC6 residency counter to work */
|
||||||
I915_WRITE(VLV_COUNTER_CONTROL,
|
I915_WRITE(VLV_COUNTER_CONTROL,
|
||||||
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
|
_MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
|
||||||
|
VLV_RENDER_RC0_COUNT_EN |
|
||||||
VLV_MEDIA_RC6_COUNT_EN |
|
VLV_MEDIA_RC6_COUNT_EN |
|
||||||
VLV_RENDER_RC6_COUNT_EN));
|
VLV_RENDER_RC6_COUNT_EN));
|
||||||
|
|
||||||
if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
|
if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
|
||||||
rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
|
rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue