diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ebbf4e400684..919c05ba9932 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -598,7 +598,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) engine->name, i915_gem_request_get_seqno(work->flip_queued_req), dev_priv->next_seqno, - engine->get_seqno(engine, true), + engine->get_seqno(engine), i915_gem_request_completed(work->flip_queued_req, true)); } else seq_printf(m, "Flip not associated with any ring\n"); @@ -730,7 +730,7 @@ static void i915_ring_seqno_info(struct seq_file *m, { if (engine->get_seqno) { seq_printf(m, "Current sequence (%s): %x\n", - engine->name, engine->get_seqno(engine, false)); + engine->name, engine->get_seqno(engine)); } } @@ -1346,8 +1346,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); for_each_engine_id(engine, dev_priv, id) { - seqno[id] = engine->get_seqno(engine, false); acthd[id] = intel_ring_get_active_head(engine); + seqno[id] = engine->get_seqno(engine); } i915_get_extra_instdone(dev, instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a93e5dd4fa9a..542401659013 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3017,15 +3017,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2) static inline bool i915_gem_request_started(struct drm_i915_gem_request *req, bool lazy_coherency) { - u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency); - return i915_seqno_passed(seqno, req->previous_seqno); + if (!lazy_coherency && req->engine->irq_seqno_barrier) + req->engine->irq_seqno_barrier(req->engine); + return i915_seqno_passed(req->engine->get_seqno(req->engine), + req->previous_seqno); } static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, bool lazy_coherency) { - u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency); - return i915_seqno_passed(seqno, req->seqno); + if (!lazy_coherency && req->engine->irq_seqno_barrier) + req->engine->irq_seqno_barrier(req->engine); + return i915_seqno_passed(req->engine->get_seqno(req->engine), + req->seqno); } int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ce77713a555d..89725c9efc25 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -931,8 +931,8 @@ static void i915_record_ring_state(struct drm_device *dev, ering->waiting = waitqueue_active(&engine->irq_queue); ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); - ering->seqno = engine->get_seqno(engine, false); ering->acthd = intel_ring_get_active_head(engine); + ering->seqno = engine->get_seqno(engine); ering->last_seqno = engine->last_submitted_seqno; ering->start = I915_READ_START(engine); ering->head = I915_READ_HEAD(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c30a12ef6daf..3b946e1c7614 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2941,7 +2941,7 @@ static int semaphore_passed(struct intel_engine_cs *engine) if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) return -1; - if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno)) + if (i915_seqno_passed(signaller->get_seqno(signaller), seqno)) return 1; /* cursory check for an unkickable deadlock */ @@ -3100,8 +3100,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work) semaphore_clear_deadlocks(dev_priv); - seqno = engine->get_seqno(engine, false); + /* We don't strictly need an irq-barrier here, as we are not + * serving an interrupt request, be paranoid in case the + * barrier has side-effects (such as preventing a broken + * cacheline snoop) and so be sure that we can see the seqno + * advance. If the seqno should stick, due to a stale + * cacheline, we would erroneously declare the GPU hung. + */ + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + acthd = intel_ring_get_active_head(engine); + seqno = engine->get_seqno(engine); if (engine->hangcheck.seqno == seqno) { if (ring_idle(engine, seqno)) { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index afdd8aefb5b7..dc0def210097 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -562,7 +562,7 @@ TRACE_EVENT(i915_gem_request_notify, TP_fast_assign( __entry->dev = engine->dev->primary->index; __entry->ring = engine->id; - __entry->seqno = engine->get_seqno(engine, false); + __entry->seqno = engine->get_seqno(engine); ), TP_printk("dev=%u, ring=%u, seqno=%u", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 67d48d759b46..f209ecfdcb5c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1852,7 +1852,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return 0; } -static u32 gen8_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) +static u32 gen8_get_seqno(struct intel_engine_cs *engine) { return intel_read_status_page(engine, I915_GEM_HWS_INDEX); } @@ -1862,10 +1862,8 @@ static void gen8_set_seqno(struct intel_engine_cs *engine, u32 seqno) intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); } -static u32 bxt_a_get_seqno(struct intel_engine_cs *engine, - bool lazy_coherency) +static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) { - /* * On BXT A steppings there is a HW coherency issue whereby the * MI_STORE_DATA_IMM storing the completed request's seqno @@ -1876,11 +1874,7 @@ static u32 bxt_a_get_seqno(struct intel_engine_cs *engine, * bxt_a_set_seqno(), where we also do a clflush after the write. So * this clflush in practice becomes an invalidate operation. */ - - if (!lazy_coherency) - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); - - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); + intel_flush_status_page(engine, I915_GEM_HWS_INDEX); } static void bxt_a_set_seqno(struct intel_engine_cs *engine, u32 seqno) @@ -2058,12 +2052,11 @@ logical_ring_default_vfuncs(struct drm_device *dev, engine->irq_get = gen8_logical_ring_get_irq; engine->irq_put = gen8_logical_ring_put_irq; engine->emit_bb_start = gen8_emit_bb_start; + engine->get_seqno = gen8_get_seqno; + engine->set_seqno = gen8_set_seqno; if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) { - engine->get_seqno = bxt_a_get_seqno; + engine->irq_seqno_barrier = bxt_a_seqno_barrier; engine->set_seqno = bxt_a_set_seqno; - } else { - engine->get_seqno = gen8_get_seqno; - engine->set_seqno = gen8_set_seqno; } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69cc3bc20495..e144f4f301bf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1568,8 +1568,8 @@ pc_render_add_request(struct drm_i915_gem_request *req) return 0; } -static u32 -gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) +static void +gen6_seqno_barrier(struct intel_engine_cs *engine) { /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like @@ -1583,16 +1583,12 @@ gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) * batch i.e. much more frequent than a delay when waiting for the * interrupt (with the same net latency). */ - if (!lazy_coherency) { - struct drm_i915_private *dev_priv = engine->dev->dev_private; - POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); - } - - return intel_read_status_page(engine, I915_GEM_HWS_INDEX); + struct drm_i915_private *dev_priv = engine->dev->dev_private; + POSTING_READ_FW(RING_ACTHD(engine->mmio_base)); } static u32 -ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) +ring_get_seqno(struct intel_engine_cs *engine) { return intel_read_status_page(engine, I915_GEM_HWS_INDEX); } @@ -1604,7 +1600,7 @@ ring_set_seqno(struct intel_engine_cs *engine, u32 seqno) } static u32 -pc_render_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency) +pc_render_get_seqno(struct intel_engine_cs *engine) { return engine->scratch.cpu_page[0]; } @@ -2828,7 +2824,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->irq_get = gen8_ring_get_irq; engine->irq_put = gen8_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev)) { WARN_ON(!dev_priv->semaphore_obj); @@ -2845,7 +2842,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) engine->irq_get = gen6_ring_get_irq; engine->irq_put = gen6_ring_put_irq; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (i915_semaphore_is_enabled(dev)) { engine->semaphore.sync_to = gen6_ring_sync; @@ -2960,7 +2958,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) engine->write_tail = gen6_bsd_ring_write_tail; engine->flush = gen6_bsd_ring_flush; engine->add_request = gen6_add_request; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (INTEL_INFO(dev)->gen >= 8) { engine->irq_enable_mask = @@ -3033,7 +3032,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev) engine->mmio_base = GEN8_BSD2_RING_BASE; engine->flush = gen6_bsd_ring_flush; engine->add_request = gen6_add_request; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; @@ -3064,7 +3064,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) engine->write_tail = ring_write_tail; engine->flush = gen6_ring_flush; engine->add_request = gen6_add_request; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (INTEL_INFO(dev)->gen >= 8) { engine->irq_enable_mask = @@ -3122,7 +3123,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) engine->write_tail = ring_write_tail; engine->flush = gen6_ring_flush; engine->add_request = gen6_add_request; - engine->get_seqno = gen6_ring_get_seqno; + engine->irq_seqno_barrier = gen6_seqno_barrier; + engine->get_seqno = ring_get_seqno; engine->set_seqno = ring_set_seqno; if (INTEL_INFO(dev)->gen >= 8) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 98eadfa79116..3f04906a081f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -193,8 +193,8 @@ struct intel_engine_cs { * seen value is good enough. Note that the seqno will always be * monotonic, even if not coherent. */ - u32 (*get_seqno)(struct intel_engine_cs *ring, - bool lazy_coherency); + void (*irq_seqno_barrier)(struct intel_engine_cs *ring); + u32 (*get_seqno)(struct intel_engine_cs *ring); void (*set_seqno)(struct intel_engine_cs *ring, u32 seqno); int (*dispatch_execbuffer)(struct drm_i915_gem_request *req,