diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 34f67eb9bfa1..d42a9d6767d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3649,7 +3649,26 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static void execlists_sanitize(struct intel_engine_cs *engine) { + /* + * Poison residual state on resume, in case the suspend didn't! + * + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. + */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); + reset_csb_pointers(engine); + + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + intel_timeline_reset_seqno(engine->kernel_context->timeline); } static void enable_error_interrupt(struct intel_engine_cs *engine) @@ -4539,6 +4558,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine) static void execlists_release(struct intel_engine_cs *engine) { + engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + execlists_shutdown(engine); intel_engine_cleanup_common(engine); @@ -4550,7 +4571,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->sanitize = execlists_sanitize; engine->resume = execlists_resume; engine->cops = &execlists_context_ops; @@ -4666,6 +4686,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) execlists->csb_size = GEN11_CSB_ENTRIES; /* Finally, take ownership and responsibility for cleanup! */ + engine->sanitize = execlists_sanitize; engine->release = execlists_release; return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 3779c2ae0d65..29a39e44fa36 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -337,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl) return 0; } +void intel_timeline_reset_seqno(const struct intel_timeline *tl) +{ + /* Must be pinned to be writable, and no requests in flight. */ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); +} + void intel_timeline_enter(struct intel_timeline *tl) { struct intel_gt_timelines *timelines = &tl->gt->timelines; @@ -365,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl) return; spin_lock(&timelines->lock); - if (!atomic_fetch_inc(&tl->active_count)) + if (!atomic_fetch_inc(&tl->active_count)) { + /* + * The HWSP is volatile, and may have been lost while inactive, + * e.g. across suspend/resume. Be paranoid, and ensure that + * the HWSP value matches our seqno so we don't proclaim + * the next request as already complete. + */ + intel_timeline_reset_seqno(tl); list_add_tail(&tl->link, &timelines->active_list); + } spin_unlock(&timelines->lock); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f5b7eade3809..c8e59a333182 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -84,6 +84,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, void intel_timeline_exit(struct intel_timeline *tl); void intel_timeline_unpin(struct intel_timeline *tl); +void intel_timeline_reset_seqno(const struct intel_timeline *tl); + int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *until, u32 *hwsp_offset);