diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1723a1f5b20e..9bef6f55f99d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", engine->name, i915_gem_request_get_seqno(work->flip_queued_req), - dev_priv->gt.global_timeline.next_seqno, + atomic_read(&dev_priv->gt.global_timeline.next_seqno), intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else @@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno); + *val = atomic_read(&dev_priv->gt.global_timeline.next_seqno); return 0; } @@ -2277,8 +2277,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_file *file; seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); - seq_printf(m, "GPU busy? %s [%x]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); + seq_printf(m, "GPU busy? %s [%d requests]\n", + yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Frequency requested %d\n", intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); @@ -2313,7 +2313,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) if (INTEL_GEN(dev_priv) >= 6 && dev_priv->rps.enabled && - dev_priv->gt.active_engines) { + dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8b987f772d47..eacb144af29e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2092,6 +2092,7 @@ struct drm_i915_private { struct list_head timelines; struct i915_gem_timeline global_timeline; + u32 active_requests; /** * Is the GPU currently considered idle, or busy executing @@ -2100,7 +2101,6 @@ struct drm_i915_private { * In order to reduce the effect on performance, there * is a slight delay before we do so. */ - unsigned int active_engines; bool awake; /** diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4cfb88bd804..8a5d20715e5f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2688,8 +2688,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); spin_unlock(&engine->execlist_lock); } - - engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_set_wedged(struct drm_i915_private *dev_priv) @@ -2746,7 +2744,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; - if (READ_ONCE(dev_priv->gt.active_engines)) + if (READ_ONCE(dev_priv->gt.active_requests)) return; rearm_hangcheck = @@ -2760,7 +2758,7 @@ i915_gem_idle_work_handler(struct work_struct *work) goto out_rearm; } - if (dev_priv->gt.active_engines) + if (dev_priv->gt.active_requests) goto out_unlock; for_each_engine(engine, dev_priv, id) @@ -4399,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev) goto err; i915_gem_retire_requests(dev_priv); + GEM_BUG_ON(dev_priv->gt.active_requests); assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9c34a4c540b5..9b22f66464f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) */ list_del(&request->ring_link); request->ring->last_retired_head = request->postfix; + request->i915->gt.active_requests--; /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -253,13 +254,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) return ret; i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, timeline->next_seqno)) { + if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) { while (intel_kick_waiters(i915) || intel_kick_signalers(i915)) yield(); yield(); } + atomic_set(&timeline->next_seqno, seqno); /* Finally reset hw state */ for_each_engine(engine, i915, id) @@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) { struct drm_i915_private *dev_priv = to_i915(dev); - int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -289,32 +291,31 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - ret = i915_gem_init_global_seqno(dev_priv, seqno - 1); - if (ret) - return ret; + return i915_gem_init_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_global_seqno(struct drm_i915_private *i915) +{ + u32 active_requests = ++i915->gt.active_requests; + u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno); + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(next_seqno + active_requests > next_seqno)) + return 0; + + ret = i915_gem_init_global_seqno(i915, 0); + if (ret) { + i915->gt.active_requests--; + return ret; + } - dev_priv->gt.global_timeline.next_seqno = seqno; return 0; } -static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv, - u32 *seqno) +static u32 timeline_get_seqno(struct i915_gem_timeline *tl) { - struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline; - - /* reserve 0 for non-seqno */ - if (unlikely(tl->next_seqno == 0)) { - int ret; - - ret = i915_gem_init_global_seqno(dev_priv, 0); - if (ret) - return ret; - - tl->next_seqno = 1; - } - - *seqno = tl->next_seqno++; - return 0; + return atomic_inc_return(&tl->next_seqno); } static int __i915_sw_fence_call @@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *req; - u32 seqno; int ret; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. @@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); + ret = reserve_global_seqno(dev_priv); + if (ret) + return ERR_PTR(ret); + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * Do not use kmem_cache_zalloc() here! */ req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - ret = i915_gem_get_global_seqno(dev_priv, &seqno); - if (ret) - goto err; + if (!req) { + ret = -ENOMEM; + goto err_unreserve; + } req->timeline = engine->timeline; @@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - seqno); + timeline_get_seqno(req->timeline->common)); i915_sw_fence_init(&req->submit, submit_notify); INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->global_seqno = seqno; + req->global_seqno = req->fence.seqno; req->ctx = i915_gem_context_get(ctx); /* No zalloc, must clear what we need by hand */ @@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, err_ctx: i915_gem_context_put(ctx); -err: kmem_cache_free(dev_priv->requests, req); +err_unreserve: + dev_priv->gt.active_requests--; return ERR_PTR(ret); } @@ -624,7 +629,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - dev_priv->gt.active_engines |= intel_engine_flag(engine); if (dev_priv->gt.awake) return; @@ -700,6 +704,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, + request->fence.seqno)); + request->emitted_jiffies = jiffies; request->previous_seqno = timeline->last_pending_seqno; timeline->last_pending_seqno = request->fence.seqno; @@ -962,38 +969,35 @@ long i915_wait_request(struct drm_i915_gem_request *req, return timeout; } -static bool engine_retire_requests(struct intel_engine_cs *engine) +static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { if (!i915_gem_request_completed(request)) - return false; + return; i915_gem_request_retire(request); } - - return true; } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - unsigned int tmp; + enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (dev_priv->gt.active_engines == 0) + if (!dev_priv->gt.active_requests) return; GEM_BUG_ON(!dev_priv->gt.awake); - for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp) - if (engine_retire_requests(engine)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); + for_each_engine(engine, dev_priv, id) + engine_retire_requests(engine); - if (dev_priv->gt.active_engines == 0) + if (!dev_priv->gt.active_requests) queue_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, msecs_to_jiffies(100)); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 767b23914ec5..18e603980dd9 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -55,7 +55,7 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - u32 next_seqno; + atomic_t next_seqno; struct drm_i915_private *i915; const char *name;