drm/i915: Use multiple VMs -- the point of no return

As with processes which run on the CPU, the goal of multiple VMs is to
provide process isolation. Specific to GEN, there is also the ability to
map more objects per process (2GB each instead of 2Gb-2k total).

For the most part, all the pipes have been laid, and all we need to do
is remove asserts and actually start changing address spaces with the
context switch. Since prior to this we've converted the setting of the
page tables to a streamed version, this is quite easy.

One important thing to point out (since it'd been hotly contested) is
that with this patch, every context created will have it's own address
space (provided the HW can do it).

v2: Disable BDW on rebase

NOTE: I tried to make this commit as small as possible. I needed one
place where I could "turn everything on" and that is here. It could be
split into finer commits, but I didn't really see much point.

Cc: Eric Anholt <eric@anholt.net>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Ben Widawsky 2013-12-06 14:11:26 -08:00 committed by Daniel Vetter
parent 3d7f0f9dcc
commit 7e0d96bc03
9 changed files with 86 additions and 58 deletions

View File

@ -1011,6 +1011,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
value = 1;
break;
case I915_PARAM_HAS_FULL_PPGTT:
value = USES_FULL_PPGTT(dev);
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;

View File

@ -116,7 +116,8 @@ MODULE_PARM_DESC(enable_hangcheck,
int i915_enable_ppgtt __read_mostly = -1;
module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400);
MODULE_PARM_DESC(i915_enable_ppgtt,
"Enable PPGTT (default: true)");
"Override PPGTT usage. "
"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
int i915_enable_psr __read_mostly = 0;
module_param_named(enable_psr, i915_enable_psr, int, 0600);

View File

@ -1831,7 +1831,9 @@ struct drm_i915_file_private {
#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6)
#define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev))
#define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev))
#define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false)
#define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true)
#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay)
#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical)
@ -2012,6 +2014,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_object_ops *ops);
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
size_t size);
void i915_init_vm(struct drm_i915_private *dev_priv,
struct i915_address_space *vm);
void i915_gem_free_object(struct drm_gem_object *obj);
void i915_gem_vma_destroy(struct i915_vma *vma);
@ -2290,7 +2294,8 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
return false;
BUG_ON(full);
if (i915_enable_ppgtt == 1 && full)
return false;
#ifdef CONFIG_INTEL_IOMMU
/* Disable ppgtt on SNB if VT-d is on. */
@ -2300,7 +2305,10 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
}
#endif
return HAS_ALIASING_PPGTT(dev);
if (full)
return HAS_PPGTT(dev);
else
return HAS_ALIASING_PPGTT(dev);
}
static inline void ppgtt_release(struct kref *kref)

View File

@ -2247,7 +2247,10 @@ request_to_vm(struct drm_i915_gem_request *request)
struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
struct i915_address_space *vm;
vm = &dev_priv->gtt.base;
if (request->ctx)
vm = request->ctx->vm;
else
vm = &dev_priv->gtt.base;
return vm;
}
@ -2718,9 +2721,6 @@ int i915_vma_unbind(struct i915_vma *vma)
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
int ret;
/* For now we only ever use 1 vma per object */
WARN_ON(!list_is_singular(&obj->vma_list));
if (list_empty(&vma->vma_link))
return 0;
@ -3268,17 +3268,12 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
i915_gem_object_pin_pages(obj);
BUG_ON(!i915_is_ggtt(vm));
vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unpin;
}
/* For now we only ever use 1 vma per object */
WARN_ON(!list_is_singular(&obj->vma_list));
search_free:
/* FIXME: Some tests are failing when they receive a reloc of 0. To
* prevent this, we simply don't allow the 0th offset. */
@ -4182,9 +4177,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
if (obj->phys_obj)
i915_gem_detach_phys_object(dev, obj);
/* NB: 0 or 1 elements */
WARN_ON(!list_empty(&obj->vma_list) &&
!list_is_singular(&obj->vma_list));
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
int ret;
@ -4580,9 +4572,11 @@ init_ring_lists(struct intel_ring_buffer *ring)
INIT_LIST_HEAD(&ring->request_list);
}
static void i915_init_vm(struct drm_i915_private *dev_priv,
struct i915_address_space *vm)
void i915_init_vm(struct drm_i915_private *dev_priv,
struct i915_address_space *vm)
{
if (!i915_is_ggtt(vm))
drm_mm_init(&vm->mm, vm->start, vm->total);
vm->dev = dev_priv->dev;
INIT_LIST_HEAD(&vm->active_list);
INIT_LIST_HEAD(&vm->inactive_list);

View File

@ -288,17 +288,15 @@ i915_gem_create_context(struct drm_device *dev,
DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
goto err_destroy;
}
ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
}
} else if (USES_ALIASING_PPGTT(dev)) {
/* For platforms which only have aliasing PPGTT, we fake the
* address space and refcounting. */
kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
}
/* TODO: Until full ppgtt... */
if (USES_ALIASING_PPGTT(dev))
ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
else
kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
} else
ctx->vm = &dev_priv->gtt.base;
return ctx;
@ -500,7 +498,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
mutex_lock(&dev->struct_mutex);
file_priv->private_default_ctx =
i915_gem_create_context(dev, file_priv, false);
i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
mutex_unlock(&dev->struct_mutex);
if (IS_ERR(file_priv->private_default_ctx)) {
@ -587,6 +585,7 @@ static int do_switch(struct intel_ring_buffer *ring,
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct i915_hw_context *from = ring->last_context;
struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
u32 hw_flags = 0;
int ret, i;
@ -598,17 +597,15 @@ static int do_switch(struct intel_ring_buffer *ring,
if (from == to && from->last_ring == ring && !to->remap_slice)
return 0;
if (ring != &dev_priv->ring[RCS]) {
if (from)
i915_gem_context_unreference(from);
goto done;
/* Trying to pin first makes error handling easier. */
if (ring == &dev_priv->ring[RCS]) {
ret = i915_gem_obj_ggtt_pin(to->obj,
get_context_alignment(ring->dev),
false, false);
if (ret)
return ret;
}
ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev),
false, false);
if (ret)
return ret;
/*
* Pin can switch back to the default context if we end up calling into
* evict_everything - as a last ditch gtt defrag effort that also
@ -616,6 +613,18 @@ static int do_switch(struct intel_ring_buffer *ring,
*/
from = ring->last_context;
if (USES_FULL_PPGTT(ring->dev)) {
ret = ppgtt->switch_mm(ppgtt, ring, false);
if (ret)
goto unpin_out;
}
if (ring != &dev_priv->ring[RCS]) {
if (from)
i915_gem_context_unreference(from);
goto done;
}
/*
* Clear this page out of any CPU caches for coherent swap-in/out. Note
* that thanks to write = false in this call and us not setting any gpu
@ -625,10 +634,8 @@ static int do_switch(struct intel_ring_buffer *ring,
* XXX: We need a real interface to do this instead of trickery.
*/
ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
if (ret) {
i915_gem_object_ggtt_unpin(to->obj);
return ret;
}
if (ret)
goto unpin_out;
if (!to->obj->has_global_gtt_mapping) {
struct i915_vma *vma = i915_gem_obj_to_vma(to->obj,
@ -640,10 +647,8 @@ static int do_switch(struct intel_ring_buffer *ring,
hw_flags |= MI_RESTORE_INHIBIT;
ret = mi_set_context(ring, to, hw_flags);
if (ret) {
i915_gem_object_ggtt_unpin(to->obj);
return ret;
}
if (ret)
goto unpin_out;
for (i = 0; i < MAX_L3_SLICES; i++) {
if (!(to->remap_slice & (1<<i)))
@ -688,6 +693,11 @@ static int do_switch(struct intel_ring_buffer *ring,
to->last_ring = ring;
return 0;
unpin_out:
if (ring->id == RCS)
i915_gem_object_ggtt_unpin(to->obj);
return ret;
}
/**
@ -736,7 +746,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
ctx = i915_gem_create_context(dev, file_priv, false);
ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
mutex_unlock(&dev->struct_mutex);
if (IS_ERR(ctx))
return PTR_ERR(ctx);

View File

@ -991,7 +991,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_hw_context *ctx;
struct i915_address_space *vm;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
u32 exec_start = args->batch_start_offset, exec_len;
u32 mask, flags;
int ret, mode, i;
bool need_relocs;
@ -1112,9 +1112,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_context_reference(ctx);
/* HACK until we have full PPGTT */
/* vm = ctx->vm; */
vm = &dev_priv->gtt.base;
vm = ctx->vm;
if (!USES_FULL_PPGTT(dev))
vm = &dev_priv->gtt.base;
eb = eb_create(args);
if (eb == NULL) {
@ -1170,6 +1170,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
}
if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
if (ret)
goto err;
@ -1199,8 +1204,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
exec_start = i915_gem_obj_offset(batch_obj, vm) +
args->batch_start_offset;
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {

View File

@ -324,6 +324,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
container_of(vm, struct i915_hw_ppgtt, base);
int i, j;
list_del(&vm->global_link);
drm_mm_takedown(&vm->mm);
for (i = 0; i < ppgtt->num_pd_pages ; i++) {
@ -755,6 +756,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
container_of(vm, struct i915_hw_ppgtt, base);
int i;
list_del(&vm->global_link);
drm_mm_takedown(&ppgtt->base.mm);
drm_mm_remove_node(&ppgtt->node);
@ -901,17 +903,22 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
BUG();
if (!ret) {
struct drm_i915_private *dev_priv = dev->dev_private;
kref_init(&ppgtt->ref);
drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
ppgtt->base.total);
if (INTEL_INFO(dev)->gen < 8)
i915_init_vm(dev_priv, &ppgtt->base);
if (INTEL_INFO(dev)->gen < 8) {
gen6_write_pdes(ppgtt);
DRM_DEBUG("Adding PPGTT at offset %x\n",
ppgtt->pd_offset << 10);
}
}
return ret;
}
static void __always_unused
static void
ppgtt_bind_vma(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
@ -923,7 +930,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
}
static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma)
static void ppgtt_unbind_vma(struct i915_vma *vma)
{
const unsigned long entry = vma->node.start >> PAGE_SHIFT;
@ -1719,8 +1726,13 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
case 8:
case 7:
case 6:
vma->unbind_vma = ggtt_unbind_vma;
vma->bind_vma = ggtt_bind_vma;
if (i915_is_ggtt(vm)) {
vma->unbind_vma = ggtt_unbind_vma;
vma->bind_vma = ggtt_bind_vma;
} else {
vma->unbind_vma = ppgtt_unbind_vma;
vma->bind_vma = ppgtt_bind_vma;
}
break;
case 5:
case 4:

View File

@ -909,11 +909,6 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
list_for_each_entry(vm, &dev_priv->vm_list, global_link)
cnt++;
if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
cnt = 1;
vm = &dev_priv->gtt.base;
error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),

View File

@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_NO_RELOC 25
#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
#define I915_PARAM_HAS_WT 27
#define I915_PARAM_HAS_FULL_PPGTT 28
typedef struct drm_i915_getparam {
int param;