diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b05bd189eab..e3377abc0d4d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1331,7 +1331,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct intel_engine_cs *ring; u64 acthd[I915_NUM_RINGS]; u32 seqno[I915_NUM_RINGS]; - int i; + u32 instdone[I915_NUM_INSTDONE_REG]; + int i, j; if (!i915.enable_hangcheck) { seq_printf(m, "Hangcheck disabled\n"); @@ -1345,6 +1346,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) acthd[i] = intel_ring_get_active_head(ring); } + i915_get_extra_instdone(dev, instdone); + intel_runtime_pm_put(dev_priv); if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) { @@ -1365,6 +1368,21 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) (long long)ring->hangcheck.max_acthd); seq_printf(m, "\tscore = %d\n", ring->hangcheck.score); seq_printf(m, "\taction = %d\n", ring->hangcheck.action); + + if (ring->id == RCS) { + seq_puts(m, "\tinstdone read ="); + + for (j = 0; j < I915_NUM_INSTDONE_REG; j++) + seq_printf(m, " 0x%08x", instdone[j]); + + seq_puts(m, "\n\tinstdone accu ="); + + for (j = 0; j < I915_NUM_INSTDONE_REG; j++) + seq_printf(m, " 0x%08x", + ring->hangcheck.instdone[j]); + + seq_puts(m, "\n"); + } } return 0; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fa8afa7860ae..b775decf9d27 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2949,14 +2949,44 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) ring->hangcheck.deadlock = 0; } -static enum intel_ring_hangcheck_action -ring_stuck(struct intel_engine_cs *ring, u64 acthd) +static bool subunits_stuck(struct intel_engine_cs *ring) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - u32 tmp; + u32 instdone[I915_NUM_INSTDONE_REG]; + bool stuck; + int i; + if (ring->id != RCS) + return true; + + i915_get_extra_instdone(ring->dev, instdone); + + /* There might be unstable subunit states even when + * actual head is not moving. Filter out the unstable ones by + * accumulating the undone -> done transitions and only + * consider those as progress. + */ + stuck = true; + for (i = 0; i < I915_NUM_INSTDONE_REG; i++) { + const u32 tmp = instdone[i] | ring->hangcheck.instdone[i]; + + if (tmp != ring->hangcheck.instdone[i]) + stuck = false; + + ring->hangcheck.instdone[i] |= tmp; + } + + return stuck; +} + +static enum intel_ring_hangcheck_action +head_stuck(struct intel_engine_cs *ring, u64 acthd) +{ if (acthd != ring->hangcheck.acthd) { + + /* Clear subunit states on head movement */ + memset(ring->hangcheck.instdone, 0, + sizeof(ring->hangcheck.instdone)); + if (acthd > ring->hangcheck.max_acthd) { ring->hangcheck.max_acthd = acthd; return HANGCHECK_ACTIVE; @@ -2965,6 +2995,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) return HANGCHECK_ACTIVE_LOOP; } + if (!subunits_stuck(ring)) + return HANGCHECK_ACTIVE; + + return HANGCHECK_HUNG; +} + +static enum intel_ring_hangcheck_action +ring_stuck(struct intel_engine_cs *ring, u64 acthd) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + enum intel_ring_hangcheck_action ha; + u32 tmp; + + ha = head_stuck(ring, acthd); + if (ha != HANGCHECK_HUNG) + return ha; + if (IS_GEN2(dev)) return HANGCHECK_HUNG; @@ -3106,7 +3154,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (ring->hangcheck.score > 0) ring->hangcheck.score--; + /* Clear head and subunit states on seqno movement */ ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0; + + memset(ring->hangcheck.instdone, 0, + sizeof(ring->hangcheck.instdone)); } ring->hangcheck.seqno = seqno; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 49574ffe54bc..7349d9258191 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -93,6 +93,7 @@ struct intel_ring_hangcheck { int score; enum intel_ring_hangcheck_action action; int deadlock; + u32 instdone[I915_NUM_INSTDONE_REG]; }; struct intel_ringbuffer {