drm/i915: Perform object clflushing asynchronously
Flushing the cachelines for an object is slow, can be as much as 100ms for a large framebuffer. We currently do this under the struct_mutex BKL on execution or on pageflip. But now with the ability to add fences to obj->resv for both flips and execbuf (and we naturally wait on the fence before CPU access), we can move the clflush operation to a workqueue and signal a fence for completion, thereby doing the work asynchronously and not blocking the driver or its clients. v2: Introduce i915_gem_clflush.h and use a new name, split out some extras into separate patches. Suggested-by: Akash Goel <akash.goel@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-5-chris@chris-wilson.co.uk
This commit is contained in:
parent
f6aaba4dfb
commit
57822dc6b9
|
@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
|
|||
# GEM code
|
||||
i915-y += i915_cmd_parser.o \
|
||||
i915_gem_batch_pool.o \
|
||||
i915_gem_clflush.o \
|
||||
i915_gem_context.o \
|
||||
i915_gem_dmabuf.o \
|
||||
i915_gem_evict.o \
|
||||
|
|
|
@ -3383,7 +3383,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
|
|||
void i915_gem_reset(struct drm_i915_private *dev_priv);
|
||||
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
|
||||
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
|
||||
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
|
||||
|
||||
void i915_gem_init_mmio(struct drm_i915_private *i915);
|
||||
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
|
||||
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <drm/drm_vma_manager.h>
|
||||
#include <drm/i915_drm.h>
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gem_clflush.h"
|
||||
#include "i915_vgpu.h"
|
||||
#include "i915_trace.h"
|
||||
#include "intel_drv.h"
|
||||
|
@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
||||
bool force)
|
||||
{
|
||||
/* If we don't have a page list set up, then we're not pinned
|
||||
* to GPU, and we can ignore the cache flush because it'll happen
|
||||
* again at bind time.
|
||||
*/
|
||||
if (!obj->mm.pages) {
|
||||
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stolen memory is always coherent with the GPU as it is explicitly
|
||||
* marked as wc by the system, or the system is cache-coherent.
|
||||
* Similarly, we only access struct pages through the CPU cache, so
|
||||
* anything not backed by physical memory we consider to be always
|
||||
* coherent and not need clflushing.
|
||||
*/
|
||||
if (!i915_gem_object_has_struct_page(obj))
|
||||
return;
|
||||
|
||||
/* If the GPU is snooping the contents of the CPU cache,
|
||||
* we do not need to manually clear the CPU cache lines. However,
|
||||
* the caches are only snooped when the render cache is
|
||||
* flushed/invalidated. As we always have to emit invalidations
|
||||
* and flushes when moving into and out of the RENDER domain, correct
|
||||
* snooping behaviour occurs naturally as the result of our domain
|
||||
* tracking.
|
||||
*/
|
||||
if (!force && i915_gem_object_is_coherent(obj)) {
|
||||
obj->cache_dirty = true;
|
||||
return;
|
||||
}
|
||||
|
||||
trace_i915_gem_object_clflush(obj);
|
||||
drm_clflush_sg(obj->mm.pages);
|
||||
obj->cache_dirty = false;
|
||||
}
|
||||
|
||||
/** Flushes the GTT write domain for the object if it's dirty. */
|
||||
static void
|
||||
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
|
||||
|
@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
|
|||
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
|
||||
return;
|
||||
|
||||
i915_gem_clflush_object(obj, obj->pin_display);
|
||||
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
|
||||
|
||||
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
|
||||
obj->base.write_domain = 0;
|
||||
}
|
||||
|
||||
|
@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
|
|||
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
|
||||
return;
|
||||
|
||||
i915_gem_clflush_object(obj, true);
|
||||
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
|
||||
|
||||
i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
|
||||
obj->base.write_domain = 0;
|
||||
}
|
||||
|
||||
|
@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
|
|||
|
||||
/* Flush the CPU cache if it's still invalid. */
|
||||
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
|
||||
i915_gem_clflush_object(obj, false);
|
||||
|
||||
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
|
||||
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
|
||||
}
|
||||
|
||||
|
@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
|
|||
|
||||
mutex_lock(&dev_priv->drm.struct_mutex);
|
||||
|
||||
i915_gem_clflush_init(dev_priv);
|
||||
|
||||
if (!i915.enable_execlists) {
|
||||
dev_priv->gt.resume = intel_legacy_submission_resume;
|
||||
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
|
||||
|
|
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "intel_frontbuffer.h"
|
||||
#include "i915_gem_clflush.h"
|
||||
|
||||
static DEFINE_SPINLOCK(clflush_lock);
|
||||
static u64 clflush_context;
|
||||
|
||||
struct clflush {
|
||||
struct dma_fence dma; /* Must be first for dma_fence_free() */
|
||||
struct i915_sw_fence wait;
|
||||
struct work_struct work;
|
||||
struct drm_i915_gem_object *obj;
|
||||
};
|
||||
|
||||
static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return DRIVER_NAME;
|
||||
}
|
||||
|
||||
static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
|
||||
{
|
||||
return "clflush";
|
||||
}
|
||||
|
||||
static bool i915_clflush_enable_signaling(struct dma_fence *fence)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void i915_clflush_release(struct dma_fence *fence)
|
||||
{
|
||||
struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
|
||||
|
||||
i915_sw_fence_fini(&clflush->wait);
|
||||
|
||||
BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
|
||||
dma_fence_free(&clflush->dma);
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops i915_clflush_ops = {
|
||||
.get_driver_name = i915_clflush_get_driver_name,
|
||||
.get_timeline_name = i915_clflush_get_timeline_name,
|
||||
.enable_signaling = i915_clflush_enable_signaling,
|
||||
.wait = dma_fence_default_wait,
|
||||
.release = i915_clflush_release,
|
||||
};
|
||||
|
||||
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
drm_clflush_sg(obj->mm.pages);
|
||||
obj->cache_dirty = false;
|
||||
|
||||
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
|
||||
}
|
||||
|
||||
static void i915_clflush_work(struct work_struct *work)
|
||||
{
|
||||
struct clflush *clflush = container_of(work, typeof(*clflush), work);
|
||||
struct drm_i915_gem_object *obj = clflush->obj;
|
||||
|
||||
if (!obj->cache_dirty)
|
||||
goto out;
|
||||
|
||||
if (i915_gem_object_pin_pages(obj)) {
|
||||
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
__i915_do_clflush(obj);
|
||||
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
|
||||
out:
|
||||
i915_gem_object_put(obj);
|
||||
|
||||
dma_fence_signal(&clflush->dma);
|
||||
dma_fence_put(&clflush->dma);
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
i915_clflush_notify(struct i915_sw_fence *fence,
|
||||
enum i915_sw_fence_notify state)
|
||||
{
|
||||
struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
|
||||
|
||||
switch (state) {
|
||||
case FENCE_COMPLETE:
|
||||
schedule_work(&clflush->work);
|
||||
break;
|
||||
|
||||
case FENCE_FREE:
|
||||
dma_fence_put(&clflush->dma);
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct clflush *clflush;
|
||||
|
||||
/*
|
||||
* Stolen memory is always coherent with the GPU as it is explicitly
|
||||
* marked as wc by the system, or the system is cache-coherent.
|
||||
* Similarly, we only access struct pages through the CPU cache, so
|
||||
* anything not backed by physical memory we consider to be always
|
||||
* coherent and not need clflushing.
|
||||
*/
|
||||
if (!i915_gem_object_has_struct_page(obj))
|
||||
return;
|
||||
|
||||
obj->cache_dirty = true;
|
||||
|
||||
/* If the GPU is snooping the contents of the CPU cache,
|
||||
* we do not need to manually clear the CPU cache lines. However,
|
||||
* the caches are only snooped when the render cache is
|
||||
* flushed/invalidated. As we always have to emit invalidations
|
||||
* and flushes when moving into and out of the RENDER domain, correct
|
||||
* snooping behaviour occurs naturally as the result of our domain
|
||||
* tracking.
|
||||
*/
|
||||
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
|
||||
return;
|
||||
|
||||
trace_i915_gem_object_clflush(obj);
|
||||
|
||||
clflush = NULL;
|
||||
if (!(flags & I915_CLFLUSH_SYNC))
|
||||
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
|
||||
if (clflush) {
|
||||
dma_fence_init(&clflush->dma,
|
||||
&i915_clflush_ops,
|
||||
&clflush_lock,
|
||||
clflush_context,
|
||||
0);
|
||||
i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
|
||||
|
||||
clflush->obj = i915_gem_object_get(obj);
|
||||
INIT_WORK(&clflush->work, i915_clflush_work);
|
||||
|
||||
dma_fence_get(&clflush->dma);
|
||||
|
||||
i915_sw_fence_await_reservation(&clflush->wait,
|
||||
obj->resv, NULL,
|
||||
false, I915_FENCE_TIMEOUT,
|
||||
GFP_KERNEL);
|
||||
|
||||
reservation_object_lock(obj->resv, NULL);
|
||||
reservation_object_add_excl_fence(obj->resv, &clflush->dma);
|
||||
reservation_object_unlock(obj->resv);
|
||||
|
||||
i915_sw_fence_commit(&clflush->wait);
|
||||
} else if (obj->mm.pages) {
|
||||
__i915_do_clflush(obj);
|
||||
} else {
|
||||
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
|
||||
}
|
||||
}
|
||||
|
||||
void i915_gem_clflush_init(struct drm_i915_private *i915)
|
||||
{
|
||||
clflush_context = dma_fence_context_alloc(1);
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __I915_GEM_CLFLUSH_H__
|
||||
#define __I915_GEM_CLFLUSH_H__
|
||||
|
||||
struct drm_i915_private;
|
||||
struct drm_i915_gem_object;
|
||||
|
||||
void i915_gem_clflush_init(struct drm_i915_private *i915);
|
||||
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags);
|
||||
#define I915_CLFLUSH_FORCE BIT(0)
|
||||
#define I915_CLFLUSH_SYNC BIT(1)
|
||||
|
||||
#endif /* __I915_GEM_CLFLUSH_H__ */
|
|
@ -35,6 +35,7 @@
|
|||
#include <drm/i915_drm.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gem_clflush.h"
|
||||
#include "i915_trace.h"
|
||||
#include "intel_drv.h"
|
||||
#include "intel_frontbuffer.h"
|
||||
|
@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
|
|||
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
|
||||
continue;
|
||||
|
||||
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
|
||||
i915_gem_clflush_object(obj, 0);
|
||||
obj->base.write_domain = 0;
|
||||
}
|
||||
|
||||
ret = i915_gem_request_await_object
|
||||
(req, obj, obj->base.pending_write_domain);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
|
||||
i915_gem_clflush_object(obj, false);
|
||||
}
|
||||
|
||||
/* Unconditionally flush any chipset caches (for streaming writes). */
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "intel_frontbuffer.h"
|
||||
#include <drm/i915_drm.h>
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gem_clflush.h"
|
||||
#include "intel_dsi.h"
|
||||
#include "i915_trace.h"
|
||||
#include <drm/drm_atomic.h>
|
||||
|
@ -13188,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
|
|||
struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
|
||||
int ret;
|
||||
|
||||
if (obj) {
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR &&
|
||||
INTEL_INFO(dev_priv)->cursor_needs_physical) {
|
||||
const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
|
||||
|
||||
ret = i915_gem_object_attach_phys(obj, align);
|
||||
if (ret) {
|
||||
DRM_DEBUG_KMS("failed to attach phys object\n");
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
struct i915_vma *vma;
|
||||
|
||||
vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
|
||||
if (IS_ERR(vma)) {
|
||||
DRM_DEBUG_KMS("failed to pin object\n");
|
||||
return PTR_ERR(vma);
|
||||
}
|
||||
|
||||
to_intel_plane_state(new_state)->vma = vma;
|
||||
}
|
||||
}
|
||||
|
||||
if (!obj && !old_obj)
|
||||
return 0;
|
||||
|
||||
|
@ -13240,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
|
|||
i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
|
||||
}
|
||||
|
||||
if (plane->type == DRM_PLANE_TYPE_CURSOR &&
|
||||
INTEL_INFO(dev_priv)->cursor_needs_physical) {
|
||||
int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
|
||||
ret = i915_gem_object_attach_phys(obj, align);
|
||||
if (ret) {
|
||||
DRM_DEBUG_KMS("failed to attach phys object\n");
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
struct i915_vma *vma;
|
||||
|
||||
vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
|
||||
if (IS_ERR(vma)) {
|
||||
DRM_DEBUG_KMS("failed to pin object\n");
|
||||
return PTR_ERR(vma);
|
||||
}
|
||||
|
||||
to_intel_plane_state(new_state)->vma = vma;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue