mirror of https://gitee.com/openkylin/linux.git
drm/i915: Restore context and pd for ringbuffer submission after reset
Following a reset, the context and page directory registers are lost.
However, the queue of requests that we resubmit after the reset may
depend upon them - the registers are restored from a context image, but
that restore may be inhibited and may simply be absent from the request
if it was in the middle of a sequence using the same context. If we
prime the CCID/PD registers with the first request in the queue (even
for the hung request), we prevent invalid memory access for the
following requests (and continually hung engines).
v2: Magic BIT(8), reserved for future use but still appears unused.
v3: Some commentary on handling innocent vs guilty requests
v4: Add a wait for PD_BASE fetch. The reload appears to be instant on my
Ivybridge, but this bit probably exists for a reason.
Fixes: 821ed7df6e
("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170207152437.4252-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
This commit is contained in:
parent
6248017ae5
commit
c0dcb203fb
|
@ -2745,21 +2745,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
|
|||
engine->irq_seqno_barrier(engine);
|
||||
|
||||
request = i915_gem_find_active_request(engine);
|
||||
if (!request)
|
||||
return;
|
||||
if (request && i915_gem_reset_request(request)) {
|
||||
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
|
||||
engine->name, request->global_seqno);
|
||||
|
||||
if (!i915_gem_reset_request(request))
|
||||
return;
|
||||
|
||||
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
|
||||
engine->name, request->global_seqno);
|
||||
/* If this context is now banned, skip all pending requests. */
|
||||
if (i915_gem_context_is_banned(request->ctx))
|
||||
engine_skip_context(request);
|
||||
}
|
||||
|
||||
/* Setup the CS to resume from the breadcrumb of the hung request */
|
||||
engine->reset_hw(engine, request);
|
||||
|
||||
/* If this context is now banned, skip all of its pending requests. */
|
||||
if (i915_gem_context_is_banned(request->ctx))
|
||||
engine_skip_context(request);
|
||||
}
|
||||
|
||||
void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
|
||||
|
|
|
@ -3307,8 +3307,10 @@ enum skl_disp_power_wells {
|
|||
/*
|
||||
* Logical Context regs
|
||||
*/
|
||||
#define CCID _MMIO(0x2180)
|
||||
#define CCID_EN (1<<0)
|
||||
#define CCID _MMIO(0x2180)
|
||||
#define CCID_EN BIT(0)
|
||||
#define CCID_EXTENDED_STATE_RESTORE BIT(2)
|
||||
#define CCID_EXTENDED_STATE_SAVE BIT(3)
|
||||
/*
|
||||
* Notes on SNB/IVB/VLV context size:
|
||||
* - Power context is saved elsewhere (LLC or stolen)
|
||||
|
|
|
@ -1352,7 +1352,20 @@ static void reset_common_ring(struct intel_engine_cs *engine,
|
|||
struct drm_i915_gem_request *request)
|
||||
{
|
||||
struct execlist_port *port = engine->execlist_port;
|
||||
struct intel_context *ce = &request->ctx->engine[engine->id];
|
||||
struct intel_context *ce;
|
||||
|
||||
/* If the request was innocent, we leave the request in the ELSP
|
||||
* and will try to replay it on restarting. The context image may
|
||||
* have been corrupted by the reset, in which case we may have
|
||||
* to service a new GPU hang, but more likely we can continue on
|
||||
* without impact.
|
||||
*
|
||||
* If the request was guilty, we presume the context is corrupt
|
||||
* and have to at least restore the RING register in the context
|
||||
* image back to the expected values to skip over the guilty request.
|
||||
*/
|
||||
if (!request || request->fence.error != -EIO)
|
||||
return;
|
||||
|
||||
/* We want a simple context + ring to execute the breadcrumb update.
|
||||
* We cannot rely on the context being intact across the GPU hang,
|
||||
|
@ -1361,6 +1374,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
|
|||
* future request will be after userspace has had the opportunity
|
||||
* to recreate its own state.
|
||||
*/
|
||||
ce = &request->ctx->engine[engine->id];
|
||||
execlists_init_reg_state(ce->lrc_reg_state,
|
||||
request->ctx, engine, ce->ring);
|
||||
|
||||
|
|
|
@ -599,10 +599,62 @@ static int init_ring_common(struct intel_engine_cs *engine)
|
|||
static void reset_ring_common(struct intel_engine_cs *engine,
|
||||
struct drm_i915_gem_request *request)
|
||||
{
|
||||
struct intel_ring *ring = request->ring;
|
||||
/* Try to restore the logical GPU state to match the continuation
|
||||
* of the request queue. If we skip the context/PD restore, then
|
||||
* the next request may try to execute assuming that its context
|
||||
* is valid and loaded on the GPU and so may try to access invalid
|
||||
* memory, prompting repeated GPU hangs.
|
||||
*
|
||||
* If the request was guilty, we still restore the logical state
|
||||
* in case the next request requires it (e.g. the aliasing ppgtt),
|
||||
* but skip over the hung batch.
|
||||
*
|
||||
* If the request was innocent, we try to replay the request with
|
||||
* the restored context.
|
||||
*/
|
||||
if (request) {
|
||||
struct drm_i915_private *dev_priv = request->i915;
|
||||
struct intel_context *ce = &request->ctx->engine[engine->id];
|
||||
struct i915_hw_ppgtt *ppgtt;
|
||||
|
||||
ring->head = request->postfix;
|
||||
ring->last_retired_head = -1;
|
||||
/* FIXME consider gen8 reset */
|
||||
|
||||
if (ce->state) {
|
||||
I915_WRITE(CCID,
|
||||
i915_ggtt_offset(ce->state) |
|
||||
BIT(8) /* must be set! */ |
|
||||
CCID_EXTENDED_STATE_SAVE |
|
||||
CCID_EXTENDED_STATE_RESTORE |
|
||||
CCID_EN);
|
||||
}
|
||||
|
||||
ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
|
||||
if (ppgtt) {
|
||||
u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
|
||||
|
||||
I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
|
||||
I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
|
||||
|
||||
/* Wait for the PD reload to complete */
|
||||
if (intel_wait_for_register(dev_priv,
|
||||
RING_PP_DIR_BASE(engine),
|
||||
BIT(0), 0,
|
||||
10))
|
||||
DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
|
||||
|
||||
ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
|
||||
}
|
||||
|
||||
/* If the rq hung, jump to its breadcrumb and skip the batch */
|
||||
if (request->fence.error == -EIO) {
|
||||
struct intel_ring *ring = request->ring;
|
||||
|
||||
ring->head = request->postfix;
|
||||
ring->last_retired_head = -1;
|
||||
}
|
||||
} else {
|
||||
engine->legacy_active_context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
|
||||
|
|
Loading…
Reference in New Issue