From ac65bdfef14a902b40ff69a35f5c604dba096547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Jun 2019 18:01:35 +0100 Subject: [PATCH] drm/i915: Keep rings pinned while the context is active Remember to keep the rings pinned as well as the context image until the GPU is no longer active. v2: Introduce a ring->pin_count primarily to hide the mock_ring that doesn't fit into the normal GGTT vma picture. v3: Order is important in teardown, ringbuffer submission needs to drop the pin count on the engine->kernel_context before it can gleefully free its ring. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110946 Fixes: ce476c80b8bf ("drm/i915: Keep contexts pinned until after the next kernel context switch") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190619170135.15281-1-chris@chris-wilson.co.uk (cherry picked from commit 09c5ab384f6fb30f834a5777888b4486dd7f015d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_context.c | 27 +++++++++++------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 ++++++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 10 ++----- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 31 +++++++++++++------- drivers/gpu/drm/i915/gt/mock_engine.c | 1 + 5 files changed, 53 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 2c454f227c2e..23120901c55f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -126,6 +126,7 @@ static void intel_context_retire(struct i915_active *active) if (ce->state) __context_unpin_state(ce->state); + intel_ring_unpin(ce->ring); intel_context_put(ce); } @@ -160,27 +161,35 @@ int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) intel_context_get(ce); + err = intel_ring_pin(ce->ring); + if (err) + goto err_put; + if (!ce->state) return 0; err = __context_pin_state(ce->state, flags); - if (err) { - i915_active_cancel(&ce->active); - intel_context_put(ce); - return err; - } + if (err) + goto err_ring; /* Preallocate tracking nodes */ if (!i915_gem_context_is_kernel(ce->gem_context)) { err = i915_active_acquire_preallocate_barrier(&ce->active, ce->engine); - if (err) { - i915_active_release(&ce->active); - return err; - } + if (err) + goto err_state; } return 0; + +err_state: + __context_unpin_state(ce->state); +err_ring: + intel_ring_unpin(ce->ring); +err_put: + intel_context_put(ce); + i915_active_cancel(&ce->active); + return err; } void intel_context_active_release(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 868b220214f8..43e975a26016 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -70,6 +70,18 @@ struct intel_ring { struct list_head request_list; struct list_head active_link; + /* + * As we have two types of rings, one global to the engine used + * by ringbuffer submission and those that are exclusive to a + * context used by execlists, we have to play safe and allow + * atomic updates to the pin_count. However, the actual pinning + * of the context is either done during initialisation for + * ringbuffer submission or serialised as part of the context + * pinning for execlists, and so we do not need a mutex ourselves + * to serialise intel_ring_pin/intel_ring_unpin. + */ + atomic_t pin_count; + u32 head; u32 tail; u32 emit; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b42b5f158295..82b7ace62d97 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1414,6 +1414,7 @@ static void execlists_context_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); + GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); if (ce->state) @@ -1426,7 +1427,6 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); - intel_ring_unpin(ce->ring); } static void @@ -1478,13 +1478,9 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = intel_ring_pin(ce->ring); - if (ret) - goto unpin_map; - ret = i915_gem_context_pin_hw_id(ce->gem_context); if (ret) - goto unpin_ring; + goto unpin_map; ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -1492,8 +1488,6 @@ __execlists_context_pin(struct intel_context *ce, return 0; -unpin_ring: - intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); unpin_active: diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c6023bc9452d..12010e798868 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1149,16 +1149,16 @@ i915_emit_bb_start(struct i915_request *rq, int intel_ring_pin(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; - enum i915_map_type map = i915_coherent_map_type(vma->vm->i915); unsigned int flags; void *addr; int ret; - GEM_BUG_ON(ring->vaddr); + if (atomic_fetch_inc(&ring->pin_count)) + return 0; ret = i915_timeline_pin(ring->timeline); if (ret) - return ret; + goto err_unpin; flags = PIN_GLOBAL; @@ -1172,26 +1172,31 @@ int intel_ring_pin(struct intel_ring *ring) ret = i915_vma_pin(vma, 0, 0, flags); if (unlikely(ret)) - goto unpin_timeline; + goto err_timeline; if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, map); + addr = i915_gem_object_pin_map(vma->obj, + i915_coherent_map_type(vma->vm->i915)); if (IS_ERR(addr)) { ret = PTR_ERR(addr); - goto unpin_ring; + goto err_ring; } vma->obj->pin_global++; + GEM_BUG_ON(ring->vaddr); ring->vaddr = addr; + return 0; -unpin_ring: +err_ring: i915_vma_unpin(vma); -unpin_timeline: +err_timeline: i915_timeline_unpin(ring->timeline); +err_unpin: + atomic_dec(&ring->pin_count); return ret; } @@ -1207,16 +1212,19 @@ void intel_ring_reset(struct intel_ring *ring, u32 tail) void intel_ring_unpin(struct intel_ring *ring) { - GEM_BUG_ON(!ring->vma); - GEM_BUG_ON(!ring->vaddr); + if (!atomic_dec_and_test(&ring->pin_count)) + return; /* Discard any unused bytes beyond that submitted to hw. */ intel_ring_reset(ring, ring->tail); + GEM_BUG_ON(!ring->vma); if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); else i915_gem_object_unpin_map(ring->vma->obj); + + GEM_BUG_ON(!ring->vaddr); ring->vaddr = NULL; ring->vma->obj->pin_global--; @@ -2081,10 +2089,11 @@ static void ring_destroy(struct intel_engine_cs *engine) WARN_ON(INTEL_GEN(dev_priv) > 2 && (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + intel_engine_cleanup_common(engine); + intel_ring_unpin(engine->buffer); intel_ring_put(engine->buffer); - intel_engine_cleanup_common(engine); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 086801b51441..486c6953dcb1 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -66,6 +66,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->base.effective_size = sz; ring->base.vaddr = (void *)(ring + 1); ring->base.timeline = &ring->timeline; + atomic_set(&ring->base.pin_count, 1); INIT_LIST_HEAD(&ring->base.request_list); intel_ring_update_space(&ring->base);