2017-02-14 01:15:52 +08:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2019-05-28 17:29:49 +08:00
|
|
|
#include "gem/i915_gem_pm.h"
|
|
|
|
#include "gem/selftests/igt_gem_utils.h"
|
|
|
|
#include "gem/selftests/mock_context.h"
|
2019-07-13 03:29:53 +08:00
|
|
|
#include "gt/intel_gt.h"
|
2019-05-28 17:29:49 +08:00
|
|
|
|
|
|
|
#include "i915_selftest.h"
|
2017-02-14 01:15:52 +08:00
|
|
|
|
2019-06-19 00:19:50 +08:00
|
|
|
#include "igt_flush_test.h"
|
2017-10-12 20:57:26 +08:00
|
|
|
#include "lib_sw_fence.h"
|
|
|
|
#include "mock_drm.h"
|
2017-02-14 01:15:52 +08:00
|
|
|
#include "mock_gem_device.h"
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
static void quirk_add(struct drm_i915_gem_object *obj,
|
|
|
|
struct list_head *objects)
|
|
|
|
{
|
|
|
|
/* quirk is only for live tiled objects, use it to declare ownership */
|
|
|
|
GEM_BUG_ON(obj->mm.quirked);
|
|
|
|
obj->mm.quirked = true;
|
|
|
|
list_add(&obj->st_link, objects);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int populate_ggtt(struct drm_i915_private *i915,
|
|
|
|
struct list_head *objects)
|
2017-02-14 01:15:52 +08:00
|
|
|
{
|
2019-01-18 19:36:32 +08:00
|
|
|
unsigned long unbound, bound, count;
|
2019-01-22 06:20:48 +08:00
|
|
|
struct drm_i915_gem_object *obj;
|
2019-01-18 19:36:32 +08:00
|
|
|
|
|
|
|
count = 0;
|
2019-08-10 02:25:18 +08:00
|
|
|
do {
|
2017-02-14 01:15:52 +08:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
|
2019-01-22 06:20:48 +08:00
|
|
|
if (IS_ERR(obj))
|
|
|
|
return PTR_ERR(obj);
|
|
|
|
|
2017-02-14 01:15:52 +08:00
|
|
|
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
|
2019-08-10 02:25:18 +08:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
if (vma == ERR_PTR(-ENOSPC))
|
|
|
|
break;
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
return PTR_ERR(vma);
|
2019-08-10 02:25:18 +08:00
|
|
|
}
|
2019-01-18 19:36:32 +08:00
|
|
|
|
2019-08-10 02:25:18 +08:00
|
|
|
quirk_add(obj, objects);
|
2019-01-18 19:36:32 +08:00
|
|
|
count++;
|
2019-08-10 02:25:18 +08:00
|
|
|
} while (1);
|
|
|
|
pr_debug("Filled GGTT with %lu pages [%llu total]\n",
|
|
|
|
count, i915->ggtt.vm.total / PAGE_SIZE);
|
2017-02-14 01:15:52 +08:00
|
|
|
|
2019-06-12 18:57:20 +08:00
|
|
|
bound = 0;
|
2019-01-18 19:36:32 +08:00
|
|
|
unbound = 0;
|
2019-06-12 18:57:20 +08:00
|
|
|
list_for_each_entry(obj, objects, st_link) {
|
|
|
|
GEM_BUG_ON(!obj->mm.quirked);
|
|
|
|
|
|
|
|
if (atomic_read(&obj->bind_count))
|
|
|
|
bound++;
|
|
|
|
else
|
2019-01-22 06:20:48 +08:00
|
|
|
unbound++;
|
2019-06-12 18:57:20 +08:00
|
|
|
}
|
|
|
|
GEM_BUG_ON(bound + unbound != count);
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
if (unbound) {
|
|
|
|
pr_err("%s: Found %lu objects unbound, expected %u!\n",
|
|
|
|
__func__, unbound, 0);
|
|
|
|
return -EINVAL;
|
2019-01-18 19:36:32 +08:00
|
|
|
}
|
2017-02-14 01:15:52 +08:00
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
if (bound != count) {
|
2019-01-18 19:36:32 +08:00
|
|
|
pr_err("%s: Found %lu objects bound, expected %lu!\n",
|
2019-01-22 06:20:48 +08:00
|
|
|
__func__, bound, count);
|
|
|
|
return -EINVAL;
|
2017-02-14 01:15:52 +08:00
|
|
|
}
|
|
|
|
|
2019-01-28 18:23:52 +08:00
|
|
|
if (list_empty(&i915->ggtt.vm.bound_list)) {
|
2017-02-14 01:15:52 +08:00
|
|
|
pr_err("No objects on the GGTT inactive list!\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void unpin_ggtt(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-28 18:23:52 +08:00
|
|
|
list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link)
|
2019-01-22 06:20:48 +08:00
|
|
|
if (vma->obj->mm.quirked)
|
|
|
|
i915_vma_unpin(vma);
|
2017-02-14 01:15:52 +08:00
|
|
|
}
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
static void cleanup_objects(struct drm_i915_private *i915,
|
|
|
|
struct list_head *list)
|
2017-02-14 01:15:52 +08:00
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj, *on;
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
list_for_each_entry_safe(obj, on, list, st_link) {
|
|
|
|
GEM_BUG_ON(!obj->mm.quirked);
|
|
|
|
obj->mm.quirked = false;
|
2017-02-14 01:15:52 +08:00
|
|
|
i915_gem_object_put(obj);
|
2019-01-22 06:20:48 +08:00
|
|
|
}
|
2017-02-14 01:15:52 +08:00
|
|
|
|
|
|
|
i915_gem_drain_freed_objects(i915);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_evict_something(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_ggtt *ggtt = &i915->ggtt;
|
2019-01-22 06:20:48 +08:00
|
|
|
LIST_HEAD(objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Fill the GGTT with pinned objects and try to evict one. */
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
err = populate_ggtt(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* Everything is pinned, nothing should happen */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_something(&ggtt->vm,
|
2017-02-14 01:15:52 +08:00
|
|
|
I915_GTT_PAGE_SIZE, 0, 0,
|
|
|
|
0, U64_MAX,
|
|
|
|
0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err != -ENOSPC) {
|
|
|
|
pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
unpin_ggtt(i915);
|
|
|
|
|
|
|
|
/* Everything is unpinned, we should be able to evict something */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_something(&ggtt->vm,
|
2017-02-14 01:15:52 +08:00
|
|
|
I915_GTT_PAGE_SIZE, 0, 0,
|
|
|
|
0, U64_MAX,
|
|
|
|
0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
2019-01-22 06:20:48 +08:00
|
|
|
cleanup_objects(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_overcommit(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
2019-01-22 06:20:48 +08:00
|
|
|
LIST_HEAD(objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Fill the GGTT with pinned objects and then try to pin one more.
|
|
|
|
* We expect it to fail.
|
|
|
|
*/
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
err = populate_ggtt(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
quirk_add(obj, &objects);
|
|
|
|
|
2017-02-14 01:15:52 +08:00
|
|
|
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
|
|
|
|
if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) {
|
|
|
|
pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma));
|
|
|
|
err = -EINVAL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
2019-01-22 06:20:48 +08:00
|
|
|
cleanup_objects(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_evict_for_vma(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_ggtt *ggtt = &i915->ggtt;
|
|
|
|
struct drm_mm_node target = {
|
|
|
|
.start = 0,
|
|
|
|
.size = 4096,
|
|
|
|
};
|
2019-01-22 06:20:48 +08:00
|
|
|
LIST_HEAD(objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Fill the GGTT with pinned objects and try to evict a range. */
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
err = populate_ggtt(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* Everything is pinned, nothing should happen */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err != -ENOSPC) {
|
|
|
|
pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
unpin_ggtt(i915);
|
|
|
|
|
|
|
|
/* Everything is unpinned, we should be able to evict the node */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_evict_for_node returned err=%d\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
2019-01-22 06:20:48 +08:00
|
|
|
cleanup_objects(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-03-07 07:54:02 +08:00
|
|
|
static void mock_color_adjust(const struct drm_mm_node *node,
|
|
|
|
unsigned long color,
|
|
|
|
u64 *start,
|
|
|
|
u64 *end)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_evict_for_cache_color(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_ggtt *ggtt = &i915->ggtt;
|
|
|
|
const unsigned long flags = PIN_OFFSET_FIXED;
|
|
|
|
struct drm_mm_node target = {
|
|
|
|
.start = I915_GTT_PAGE_SIZE * 2,
|
|
|
|
.size = I915_GTT_PAGE_SIZE,
|
|
|
|
.color = I915_CACHE_LLC,
|
|
|
|
};
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
2019-01-22 06:20:48 +08:00
|
|
|
LIST_HEAD(objects);
|
2017-03-07 07:54:02 +08:00
|
|
|
int err;
|
|
|
|
|
2019-09-09 20:40:52 +08:00
|
|
|
/*
|
|
|
|
* Currently the use of color_adjust for the GGTT is limited to cache
|
|
|
|
* coloring and guard pages, and so the presence of mm.color_adjust for
|
|
|
|
* the GGTT is assumed to be i915_ggtt_color_adjust, hence using a mock
|
|
|
|
* color adjust will work just fine for our purposes.
|
2017-03-07 07:54:02 +08:00
|
|
|
*/
|
2018-06-05 23:37:58 +08:00
|
|
|
ggtt->vm.mm.color_adjust = mock_color_adjust;
|
2019-09-09 20:40:52 +08:00
|
|
|
GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm));
|
2017-03-07 07:54:02 +08:00
|
|
|
|
|
|
|
obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
drm/i915: Flush pages on acquisition
When we return pages to the system, we ensure that they are marked as
being in the CPU domain since any external access is uncontrolled and we
must assume the worst. This means that we need to always flush the pages
on acquisition if we need to use them on the GPU, and from the beginning
have used set-domain. Set-domain is overkill for the purpose as it is a
general synchronisation barrier, but our intent is to only flush the
pages being swapped in. If we move that flush into the pages acquisition
phase, we know then that when we have obj->mm.pages, they are coherent
with the GPU and need only maintain that status without resorting to
heavy handed use of set-domain.
The principle knock-on effect for userspace is through mmap-gtt
pagefaulting. Our uAPI has always implied that the GTT mmap was async
(especially as when any pagefault occurs is unpredicatable to userspace)
and so userspace had to apply explicit domain control itself
(set-domain). However, swapping is transparent to the kernel, and so on
first fault we need to acquire the pages and make them coherent for
access through the GTT. Our use of set-domain here leaks into the uABI
that the first pagefault was synchronous. This is unintentional and
baring a few igt should be unoticed, nevertheless we bump the uABI
version for mmap-gtt to reflect the change in behaviour.
Another implication of the change is that gem_create() is presumed to
create an object that is coherent with the CPU and is in the CPU write
domain, so a set-domain(CPU) following a gem_create() would be a minor
operation that merely checked whether we could allocate all pages for
the object. On applying this change, a set-domain(CPU) causes a clflush
as we acquire the pages. This will have a small impact on mesa as we move
the clflush here on !llc from execbuf time to create, but that should
have minimal performance impact as the same clflush exists but is now
done early and because of the clflush issue, userspace recycles bo and
so should resist allocating fresh objects.
Internally, the presumption that objects are created in the CPU
write-domain and remain so through writes to obj->mm.mapping is more
prevalent than I expected; but easy enough to catch and apply a manual
flush.
For the future, we should push the page flush from the central
set_pages() into the callers so that we can more finely control when it
is applied, but for now doing it one location is easier to validate, at
the cost of sometimes flushing when there is no need.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190321161908.8007-1-chris@chris-wilson.co.uk
2019-03-22 00:19:07 +08:00
|
|
|
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
|
2019-01-22 06:20:48 +08:00
|
|
|
quirk_add(obj, &objects);
|
2017-03-07 07:54:02 +08:00
|
|
|
|
|
|
|
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
|
|
|
|
I915_GTT_PAGE_SIZE | flags);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
pr_err("[0]i915_gem_object_ggtt_pin failed\n");
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
drm/i915: Flush pages on acquisition
When we return pages to the system, we ensure that they are marked as
being in the CPU domain since any external access is uncontrolled and we
must assume the worst. This means that we need to always flush the pages
on acquisition if we need to use them on the GPU, and from the beginning
have used set-domain. Set-domain is overkill for the purpose as it is a
general synchronisation barrier, but our intent is to only flush the
pages being swapped in. If we move that flush into the pages acquisition
phase, we know then that when we have obj->mm.pages, they are coherent
with the GPU and need only maintain that status without resorting to
heavy handed use of set-domain.
The principle knock-on effect for userspace is through mmap-gtt
pagefaulting. Our uAPI has always implied that the GTT mmap was async
(especially as when any pagefault occurs is unpredicatable to userspace)
and so userspace had to apply explicit domain control itself
(set-domain). However, swapping is transparent to the kernel, and so on
first fault we need to acquire the pages and make them coherent for
access through the GTT. Our use of set-domain here leaks into the uABI
that the first pagefault was synchronous. This is unintentional and
baring a few igt should be unoticed, nevertheless we bump the uABI
version for mmap-gtt to reflect the change in behaviour.
Another implication of the change is that gem_create() is presumed to
create an object that is coherent with the CPU and is in the CPU write
domain, so a set-domain(CPU) following a gem_create() would be a minor
operation that merely checked whether we could allocate all pages for
the object. On applying this change, a set-domain(CPU) causes a clflush
as we acquire the pages. This will have a small impact on mesa as we move
the clflush here on !llc from execbuf time to create, but that should
have minimal performance impact as the same clflush exists but is now
done early and because of the clflush issue, userspace recycles bo and
so should resist allocating fresh objects.
Internally, the presumption that objects are created in the CPU
write-domain and remain so through writes to obj->mm.mapping is more
prevalent than I expected; but easy enough to catch and apply a manual
flush.
For the future, we should push the page flush from the central
set_pages() into the callers so that we can more finely control when it
is applied, but for now doing it one location is easier to validate, at
the cost of sometimes flushing when there is no need.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190321161908.8007-1-chris@chris-wilson.co.uk
2019-03-22 00:19:07 +08:00
|
|
|
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
|
2019-01-22 06:20:48 +08:00
|
|
|
quirk_add(obj, &objects);
|
2017-03-07 07:54:02 +08:00
|
|
|
|
|
|
|
/* Neighbouring; same colour - should fit */
|
|
|
|
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
|
|
|
|
(I915_GTT_PAGE_SIZE * 2) | flags);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
pr_err("[1]i915_gem_object_ggtt_pin failed\n");
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
/* Remove just the second vma */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-03-07 07:54:02 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Attempt to remove the first *pinned* vma, by removing the (empty)
|
|
|
|
* neighbour -- this should fail.
|
|
|
|
*/
|
|
|
|
target.color = I915_CACHE_L3_LLC;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-03-07 07:54:02 +08:00
|
|
|
if (!err) {
|
|
|
|
pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
unpin_ggtt(i915);
|
2019-01-22 06:20:48 +08:00
|
|
|
cleanup_objects(i915, &objects);
|
2018-06-05 23:37:58 +08:00
|
|
|
ggtt->vm.mm.color_adjust = NULL;
|
2017-03-07 07:54:02 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-14 01:15:52 +08:00
|
|
|
static int igt_evict_vm(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_ggtt *ggtt = &i915->ggtt;
|
2019-01-22 06:20:48 +08:00
|
|
|
LIST_HEAD(objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Fill the GGTT with pinned objects and try to evict everything. */
|
|
|
|
|
2019-01-22 06:20:48 +08:00
|
|
|
err = populate_ggtt(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err)
|
|
|
|
goto cleanup;
|
|
|
|
|
|
|
|
/* Everything is pinned, nothing should happen */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_vm(&ggtt->vm);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
unpin_ggtt(i915);
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_evict_vm(&ggtt->vm);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-14 01:15:52 +08:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
|
|
|
|
err);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
2019-01-22 06:20:48 +08:00
|
|
|
cleanup_objects(i915, &objects);
|
2017-02-14 01:15:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-10-12 20:57:26 +08:00
|
|
|
static int igt_evict_contexts(void *arg)
|
|
|
|
{
|
|
|
|
const u64 PRETEND_GGTT_SIZE = 16ull << 20;
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct intel_engine_cs *engine;
|
|
|
|
enum intel_engine_id id;
|
|
|
|
struct reserved {
|
|
|
|
struct drm_mm_node node;
|
|
|
|
struct reserved *next;
|
|
|
|
} *reserved = NULL;
|
2019-01-14 22:21:22 +08:00
|
|
|
intel_wakeref_t wakeref;
|
2017-10-12 20:57:26 +08:00
|
|
|
struct drm_mm_node hole;
|
|
|
|
unsigned long count;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The purpose of this test is to verify that we will trigger an
|
|
|
|
* eviction in the GGTT when constructing a request that requires
|
|
|
|
* additional space in the GGTT for pinning the context. This space
|
|
|
|
* is not directly tied to the request so reclaiming it requires
|
|
|
|
* extra work.
|
|
|
|
*
|
|
|
|
* As such this test is only meaningful for full-ppgtt environments
|
|
|
|
* where the GTT space of the request is separate from the GGTT
|
|
|
|
* allocation required to build the request.
|
|
|
|
*/
|
2018-09-27 04:12:22 +08:00
|
|
|
if (!HAS_FULL_PPGTT(i915))
|
2017-10-12 20:57:26 +08:00
|
|
|
return 0;
|
|
|
|
|
2019-06-14 07:21:54 +08:00
|
|
|
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
|
2017-10-12 20:57:26 +08:00
|
|
|
|
|
|
|
/* Reserve a block so that we know we have enough to fit a few rq */
|
|
|
|
memset(&hole, 0, sizeof(hole));
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&i915->ggtt.vm.mutex);
|
2018-06-05 23:37:58 +08:00
|
|
|
err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole,
|
2017-10-12 20:57:26 +08:00
|
|
|
PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
|
2018-06-05 23:37:58 +08:00
|
|
|
0, i915->ggtt.vm.total,
|
2017-10-12 20:57:26 +08:00
|
|
|
PIN_NOEVICT);
|
|
|
|
if (err)
|
|
|
|
goto out_locked;
|
|
|
|
|
|
|
|
/* Make the GGTT appear small by filling it with unevictable nodes */
|
|
|
|
count = 0;
|
|
|
|
do {
|
|
|
|
struct reserved *r;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&i915->ggtt.vm.mutex);
|
2017-10-12 20:57:26 +08:00
|
|
|
r = kcalloc(1, sizeof(*r), GFP_KERNEL);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&i915->ggtt.vm.mutex);
|
2017-10-12 20:57:26 +08:00
|
|
|
if (!r) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out_locked;
|
|
|
|
}
|
|
|
|
|
2018-06-05 23:37:58 +08:00
|
|
|
if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node,
|
2017-10-12 20:57:26 +08:00
|
|
|
1ul << 20, 0, I915_COLOR_UNEVICTABLE,
|
2018-06-05 23:37:58 +08:00
|
|
|
0, i915->ggtt.vm.total,
|
2017-10-12 20:57:26 +08:00
|
|
|
PIN_NOEVICT)) {
|
|
|
|
kfree(r);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->next = reserved;
|
|
|
|
reserved = r;
|
|
|
|
|
|
|
|
count++;
|
|
|
|
} while (1);
|
|
|
|
drm_mm_remove_node(&hole);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&i915->ggtt.vm.mutex);
|
2017-10-12 20:57:26 +08:00
|
|
|
pr_info("Filled GGTT with %lu 1MiB nodes\n", count);
|
|
|
|
|
|
|
|
/* Overfill the GGTT with context objects and so try to evict one. */
|
|
|
|
for_each_engine(engine, i915, id) {
|
|
|
|
struct i915_sw_fence fence;
|
|
|
|
struct drm_file *file;
|
|
|
|
|
|
|
|
file = mock_file(i915);
|
2019-01-14 22:21:22 +08:00
|
|
|
if (IS_ERR(file)) {
|
|
|
|
err = PTR_ERR(file);
|
|
|
|
break;
|
|
|
|
}
|
2017-10-12 20:57:26 +08:00
|
|
|
|
|
|
|
count = 0;
|
|
|
|
onstack_fence_init(&fence);
|
|
|
|
do {
|
2018-02-21 17:56:36 +08:00
|
|
|
struct i915_request *rq;
|
2017-10-12 20:57:26 +08:00
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
|
|
|
|
ctx = live_context(i915, file);
|
2019-03-26 13:08:43 +08:00
|
|
|
if (IS_ERR(ctx))
|
2017-10-12 20:57:26 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
/* We will need some GGTT space for the rq's context */
|
|
|
|
igt_evict_ctl.fail_if_busy = true;
|
2019-04-27 00:33:36 +08:00
|
|
|
rq = igt_request_alloc(ctx, engine);
|
2017-10-12 20:57:26 +08:00
|
|
|
igt_evict_ctl.fail_if_busy = false;
|
|
|
|
|
|
|
|
if (IS_ERR(rq)) {
|
|
|
|
/* When full, fail_if_busy will trigger EBUSY */
|
|
|
|
if (PTR_ERR(rq) != -EBUSY) {
|
2019-10-04 21:40:08 +08:00
|
|
|
pr_err("Unexpected error from request alloc (on %s): %d\n",
|
|
|
|
engine->name,
|
2017-10-12 20:57:26 +08:00
|
|
|
(int)PTR_ERR(rq));
|
|
|
|
err = PTR_ERR(rq);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Keep every request/ctx pinned until we are full */
|
|
|
|
err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
|
|
|
|
&fence,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (err < 0)
|
|
|
|
break;
|
|
|
|
|
2018-02-21 17:56:36 +08:00
|
|
|
i915_request_add(rq);
|
2017-10-12 20:57:26 +08:00
|
|
|
count++;
|
|
|
|
err = 0;
|
|
|
|
} while(1);
|
|
|
|
onstack_fence_fini(&fence);
|
|
|
|
pr_info("Submitted %lu contexts/requests on %s\n",
|
|
|
|
count, engine->name);
|
|
|
|
|
|
|
|
mock_file_free(i915, file);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_lock(&i915->ggtt.vm.mutex);
|
2017-10-12 20:57:26 +08:00
|
|
|
out_locked:
|
2019-10-04 21:40:02 +08:00
|
|
|
if (igt_flush_test(i915))
|
2019-06-19 00:19:50 +08:00
|
|
|
err = -EIO;
|
2017-10-12 20:57:26 +08:00
|
|
|
while (reserved) {
|
|
|
|
struct reserved *next = reserved->next;
|
|
|
|
|
|
|
|
drm_mm_remove_node(&reserved->node);
|
|
|
|
kfree(reserved);
|
|
|
|
|
|
|
|
reserved = next;
|
|
|
|
}
|
|
|
|
if (drm_mm_node_allocated(&hole))
|
|
|
|
drm_mm_remove_node(&hole);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 21:39:58 +08:00
|
|
|
mutex_unlock(&i915->ggtt.vm.mutex);
|
2019-06-14 07:21:54 +08:00
|
|
|
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
|
2017-10-12 20:57:26 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-14 01:15:52 +08:00
|
|
|
int i915_gem_evict_mock_selftests(void)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_evict_something),
|
|
|
|
SUBTEST(igt_evict_for_vma),
|
2017-03-07 07:54:02 +08:00
|
|
|
SUBTEST(igt_evict_for_cache_color),
|
2017-02-14 01:15:52 +08:00
|
|
|
SUBTEST(igt_evict_vm),
|
|
|
|
SUBTEST(igt_overcommit),
|
|
|
|
};
|
|
|
|
struct drm_i915_private *i915;
|
2019-01-14 22:21:22 +08:00
|
|
|
intel_wakeref_t wakeref;
|
2019-01-14 22:21:23 +08:00
|
|
|
int err = 0;
|
2017-02-14 01:15:52 +08:00
|
|
|
|
|
|
|
i915 = mock_gem_device();
|
|
|
|
if (!i915)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-06-14 07:21:55 +08:00
|
|
|
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
|
2019-01-14 22:21:23 +08:00
|
|
|
err = i915_subtests(tests, i915);
|
2019-01-14 22:21:22 +08:00
|
|
|
|
2018-06-18 19:01:54 +08:00
|
|
|
drm_dev_put(&i915->drm);
|
2017-02-14 01:15:52 +08:00
|
|
|
return err;
|
|
|
|
}
|
2017-10-12 20:57:26 +08:00
|
|
|
|
|
|
|
int i915_gem_evict_live_selftests(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_evict_contexts),
|
|
|
|
};
|
|
|
|
|
2019-07-13 03:29:53 +08:00
|
|
|
if (intel_gt_is_wedged(&i915->gt))
|
2018-07-06 14:53:12 +08:00
|
|
|
return 0;
|
|
|
|
|
2017-10-12 20:57:26 +08:00
|
|
|
return i915_subtests(tests, i915);
|
|
|
|
}
|