drm/i915: Start passing around i915_vma from execbuffer

During execbuffer we look up the i915_vma in order to reserve them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma - so introduce i915_vma_pin()!

v2: Tidy parameter lists to remove one level of redirection in the hot
path.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470324762-2545-15-git-send-email-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2016-08-04 16:32:31 +01:00
parent 20dfbde463
commit 59bfa1248e
5 changed files with 148 additions and 206 deletions

View File

@ -3018,23 +3018,6 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
void i915_gem_free_object(struct drm_gem_object *obj);
/* Flags used by pin/bind&friends. */
#define PIN_MAPPABLE (1<<0)
#define PIN_NONBLOCK (1<<1)
#define PIN_GLOBAL (1<<2)
#define PIN_OFFSET_BIAS (1<<3)
#define PIN_USER (1<<4)
#define PIN_UPDATE (1<<5)
#define PIN_ZONE_4G (1<<6)
#define PIN_HIGH (1<<7)
#define PIN_OFFSET_FIXED (1<<8)
#define PIN_OFFSET_MASK (~4095)
int __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
u64 size,
u64 alignment,
u64 flags);
int __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
@ -3311,11 +3294,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
uint32_t alignment,
unsigned flags)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
flags | PIN_GLOBAL);
return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal,
0, alignment, flags);
}
void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,

View File

@ -2963,34 +2963,30 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
}
/**
* Finds free space in the GTT aperture and binds the object or a view of it
* there.
* @obj: object to bind
* @vm: address space to bind into
* @ggtt_view: global gtt view if applicable
* i915_vma_insert - finds a slot for the vma in its address space
* @vma: the vma
* @size: requested size in bytes (can be larger than the VMA)
* @alignment: requested alignment
* @alignment: required alignment
* @flags: mask of PIN_* flags to use
*
* First we try to allocate some free space that meets the requirements for
* the VMA. Failiing that, if the flags permit, it will evict an old VMA,
* preferrably the oldest idle entry to make room for the new VMA.
*
* Returns:
* 0 on success, negative error code otherwise.
*/
static struct i915_vma *
i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *ggtt_view,
u64 size,
u64 alignment,
u64 flags)
static int
i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
struct drm_i915_gem_object *obj = vma->obj;
u64 start, end;
u64 min_alignment;
int ret;
vma = ggtt_view ?
i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma))
return vma;
GEM_BUG_ON(vma->bound);
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
size = max(size, vma->size);
if (flags & PIN_MAPPABLE)
@ -3004,7 +3000,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
if (alignment & (min_alignment - 1)) {
DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n",
alignment, min_alignment);
return ERR_PTR(-EINVAL);
return -EINVAL;
}
start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@ -3024,17 +3020,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
return ERR_PTR(-E2BIG);
return -E2BIG;
}
ret = i915_gem_object_get_pages(obj);
if (ret)
return ERR_PTR(ret);
return ret;
i915_gem_object_pin_pages(obj);
if (flags & PIN_OFFSET_FIXED) {
uint64_t offset = flags & PIN_OFFSET_MASK;
u64 offset = flags & PIN_OFFSET_MASK;
if (offset & (alignment - 1) || offset > end - size) {
ret = -EINVAL;
goto err_unpin;
@ -3096,11 +3092,11 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
obj->bind_count++;
return vma;
return 0;
err_unpin:
i915_gem_object_unpin_pages(obj);
return ERR_PTR(ret);
return ret;
}
bool
@ -3661,6 +3657,9 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
struct drm_i915_gem_object *obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
return false;
if (vma->node.size < size)
return true;
@ -3705,91 +3704,42 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
obj->map_and_fenceable = mappable && fenceable;
}
static int
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *ggtt_view,
u64 size,
u64 alignment,
u64 flags)
int
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
unsigned bound;
unsigned int bound = vma->bound;
int ret;
if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
return -ENODEV;
GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
return -EINVAL;
if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
return -EBUSY;
if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
return -EINVAL;
/* Pin early to prevent the shrinker/eviction logic from destroying
* our vma as we insert and bind.
*/
__i915_vma_pin(vma);
if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
return -EINVAL;
vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
i915_gem_obj_to_vma(obj, vm);
if (vma) {
if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
return -EBUSY;
if (i915_vma_misplaced(vma, size, alignment, flags)) {
WARN(i915_vma_is_pinned(vma),
"bo is already pinned in %s with incorrect alignment:"
" offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
" obj->map_and_fenceable=%d\n",
ggtt_view ? "ggtt" : "ppgtt",
upper_32_bits(vma->node.start),
lower_32_bits(vma->node.start),
alignment,
!!(flags & PIN_MAPPABLE),
obj->map_and_fenceable);
ret = i915_vma_unbind(vma);
if (ret)
return ret;
vma = NULL;
}
if (!bound) {
ret = i915_vma_insert(vma, size, alignment, flags);
if (ret)
goto err;
}
if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
size, alignment, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
}
bound = vma->bound;
ret = i915_vma_bind(vma, obj->cache_level, flags);
ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
if (ret)
return ret;
goto err;
if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
(bound ^ vma->bound) & GLOBAL_BIND) {
if ((bound ^ vma->bound) & GLOBAL_BIND)
__i915_vma_set_map_and_fenceable(vma);
WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
}
GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
__i915_vma_pin(vma);
return 0;
}
int
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
u64 size,
u64 alignment,
u64 flags)
{
return i915_gem_object_do_pin(obj, vm,
i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
size, alignment, flags);
err:
__i915_vma_unpin(vma);
return ret;
}
int
@ -3799,14 +3749,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
u64 alignment,
u64 flags)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_vma *vma;
int ret;
BUG_ON(!view);
return i915_gem_object_do_pin(obj, &ggtt->base, view,
size, alignment, flags | PIN_GLOBAL);
vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view);
if (IS_ERR(vma))
return PTR_ERR(vma);
if (i915_vma_misplaced(vma, size, alignment, flags)) {
if (flags & PIN_NONBLOCK &&
(i915_vma_is_pinned(vma) || i915_vma_is_active(vma)))
return -ENOSPC;
WARN(i915_vma_is_pinned(vma),
"bo is already pinned in ggtt with incorrect alignment:"
" offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d,"
" obj->map_and_fenceable=%d\n",
upper_32_bits(vma->node.start),
lower_32_bits(vma->node.start),
alignment,
!!(flags & PIN_MAPPABLE),
obj->map_and_fenceable);
ret = i915_vma_unbind(vma);
if (ret)
return ret;
}
return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
}
void

View File

@ -45,11 +45,10 @@
struct i915_execbuffer_params {
struct drm_device *dev;
struct drm_file *file;
u32 dispatch_flags;
u32 args_batch_start_offset;
u32 batch_obj_vm_offset;
struct i915_vma *batch;
u32 dispatch_flags;
u32 args_batch_start_offset;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch_obj;
struct i915_gem_context *ctx;
struct drm_i915_gem_request *request;
};
@ -102,6 +101,26 @@ eb_reset(struct eb_vmas *eb)
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
static struct i915_vma *
eb_get_batch(struct eb_vmas *eb)
{
struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
/*
* SNA is doing fancy tricks with compressing batch buffers, which leads
* to negative relocation deltas. Usually that works out ok since the
* relocate address is still positive, except when the batch is placed
* very low in the GTT. Ensure this doesn't happen.
*
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
return vma;
}
static int
eb_lookup_vmas(struct eb_vmas *eb,
struct drm_i915_gem_exec_object2 *exec,
@ -198,35 +217,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
return ret;
}
static inline struct i915_vma *
eb_get_batch_vma(struct eb_vmas *eb)
{
/* The batch is always the LAST item in the VMA list */
struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list);
return vma;
}
static struct drm_i915_gem_object *
eb_get_batch(struct eb_vmas *eb)
{
struct i915_vma *vma = eb_get_batch_vma(eb);
/*
* SNA is doing fancy tricks with compressing batch buffers, which leads
* to negative relocation deltas. Usually that works out ok since the
* relocate address is still positive, except when the batch is placed
* very low in the GTT. Ensure this doesn't happen.
*
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
return vma->obj;
}
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
{
if (eb->and < 0) {
@ -682,16 +672,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= PIN_HIGH;
}
ret = i915_gem_object_pin(obj, vma->vm,
entry->pad_to_size,
entry->alignment,
flags);
if ((ret == -ENOSPC || ret == -E2BIG) &&
ret = i915_vma_pin(vma,
entry->pad_to_size,
entry->alignment,
flags);
if ((ret == -ENOSPC || ret == -E2BIG) &&
only_mappable_for_reloc(entry->flags))
ret = i915_gem_object_pin(obj, vma->vm,
entry->pad_to_size,
entry->alignment,
flags & ~PIN_MAPPABLE);
ret = i915_vma_pin(vma,
entry->pad_to_size,
entry->alignment,
flags & ~PIN_MAPPABLE);
if (ret)
return ret;
@ -1252,11 +1242,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
return 0;
}
static struct drm_i915_gem_object*
static struct i915_vma*
i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
struct drm_i915_gem_exec_object2 *shadow_exec_entry,
struct eb_vmas *eb,
struct drm_i915_gem_object *batch_obj,
struct eb_vmas *eb,
u32 batch_start_offset,
u32 batch_len,
bool is_master)
@ -1268,7 +1258,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
PAGE_ALIGN(batch_len));
if (IS_ERR(shadow_batch_obj))
return shadow_batch_obj;
return ERR_CAST(shadow_batch_obj);
ret = intel_engine_cmd_parser(engine,
batch_obj,
@ -1293,14 +1283,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
i915_gem_object_get(shadow_batch_obj);
list_add_tail(&vma->exec_list, &eb->vmas);
shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
return shadow_batch_obj;
return vma;
err:
i915_gem_object_unpin_pages(shadow_batch_obj);
if (ret == -EACCES) /* unhandled chained batch */
return batch_obj;
return NULL;
else
return ERR_PTR(ret);
}
@ -1381,11 +1369,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
}
exec_len = args->batch_len;
exec_start = params->batch_obj_vm_offset +
exec_start = params->batch->node.start +
params->args_batch_start_offset;
if (exec_len == 0)
exec_len = params->batch_obj->base.size;
exec_len = params->batch->size;
ret = params->engine->emit_bb_start(params->request,
exec_start, exec_len,
@ -1489,7 +1477,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct eb_vmas *eb;
struct drm_i915_gem_object *batch_obj;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
@ -1583,7 +1570,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
/* take note of the batch buffer before we might reorder the lists */
batch_obj = eb_get_batch(eb);
params->batch = eb_get_batch(eb);
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
@ -1607,7 +1594,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Set the pending read domains for the batch buffer to COMMAND */
if (batch_obj->base.pending_write_domain) {
if (params->batch->obj->base.pending_write_domain) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
@ -1615,26 +1602,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->args_batch_start_offset = args->batch_start_offset;
if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
struct drm_i915_gem_object *parsed_batch_obj;
struct i915_vma *vma;
parsed_batch_obj = i915_gem_execbuffer_parse(engine,
&shadow_exec_entry,
eb,
batch_obj,
args->batch_start_offset,
args->batch_len,
drm_is_current_master(file));
if (IS_ERR(parsed_batch_obj)) {
ret = PTR_ERR(parsed_batch_obj);
vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
params->batch->obj,
eb,
args->batch_start_offset,
args->batch_len,
drm_is_current_master(file));
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
}
/*
* parsed_batch_obj == batch_obj means batch not fully parsed:
* Accept, but don't promote to secure.
*/
if (parsed_batch_obj != batch_obj) {
if (vma) {
/*
* Batch parsed and accepted:
*
@ -1646,16 +1627,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
*/
dispatch_flags |= I915_DISPATCH_SECURE;
params->args_batch_start_offset = 0;
batch_obj = parsed_batch_obj;
params->batch = vma;
}
}
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (dispatch_flags & I915_DISPATCH_SECURE) {
struct drm_i915_gem_object *obj = params->batch->obj;
/*
* So on first glance it looks freaky that we pin the batch here
* outside of the reservation loop. But:
@ -1666,13 +1649,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* fitting due to fragmentation.
* So this is actually safe.
*/
ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
if (ret)
goto err;
params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
} else
params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
params->batch = i915_gem_obj_to_ggtt(obj);
}
/* Allocate a request for this batch buffer nice and early. */
params->request = i915_gem_request_alloc(engine, ctx);
@ -1695,12 +1677,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->file = file;
params->engine = engine;
params->dispatch_flags = dispatch_flags;
params->batch_obj = batch_obj;
params->ctx = ctx;
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
__i915_add_request(params->request, params->batch_obj, ret == 0);
__i915_add_request(params->request, params->batch->obj, ret == 0);
err_batch_unpin:
/*
@ -1710,8 +1691,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
* active.
*/
if (dispatch_flags & I915_DISPATCH_SECURE)
i915_gem_object_ggtt_unpin(batch_obj);
i915_vma_unpin(params->batch);
err:
/* the request owns the ref now */
i915_gem_context_put(ctx);

View File

@ -3665,13 +3665,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
return 0;
if (vma->bound == 0 && vma->vm->allocate_va_range) {
/* XXX: i915_vma_pin() will fix this +- hack */
__i915_vma_pin(vma);
trace_i915_va_alloc(vma);
ret = vma->vm->allocate_va_range(vma->vm,
vma->node.start,
vma->node.size);
__i915_vma_unpin(vma);
if (ret)
return ret;
}

View File

@ -609,6 +609,20 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
return true;
}
int __must_check
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
/* Flags used by pin/bind&friends. */
#define PIN_MAPPABLE BIT(0)
#define PIN_NONBLOCK BIT(1)
#define PIN_GLOBAL BIT(2)
#define PIN_OFFSET_BIAS BIT(3)
#define PIN_USER BIT(4)
#define PIN_UPDATE BIT(5)
#define PIN_ZONE_4G BIT(6)
#define PIN_HIGH BIT(7)
#define PIN_OFFSET_FIXED BIT(8)
#define PIN_OFFSET_MASK (~4095)
static inline int i915_vma_pin_count(const struct i915_vma *vma)
{
return vma->pin_count;