drm/amdgpu: add user fence context map v2
This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
This commit is contained in:
parent
91e1a5207e
commit
21c16bf634
|
@ -415,6 +415,8 @@ struct amdgpu_user_fence {
|
||||||
struct amdgpu_bo *bo;
|
struct amdgpu_bo *bo;
|
||||||
/* write-back address offset to bo start */
|
/* write-back address offset to bo start */
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
|
/* resulting sequence number */
|
||||||
|
uint64_t sequence;
|
||||||
};
|
};
|
||||||
|
|
||||||
int amdgpu_fence_driver_init(struct amdgpu_device *adev);
|
int amdgpu_fence_driver_init(struct amdgpu_device *adev);
|
||||||
|
@ -985,9 +987,18 @@ struct amdgpu_vm_manager {
|
||||||
* context related structures
|
* context related structures
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define AMDGPU_CTX_MAX_CS_PENDING 16
|
||||||
|
|
||||||
|
struct amdgpu_ctx_ring {
|
||||||
|
uint64_t sequence;
|
||||||
|
struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING];
|
||||||
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx {
|
struct amdgpu_ctx {
|
||||||
struct kref refcount;
|
struct kref refcount;
|
||||||
unsigned reset_counter;
|
unsigned reset_counter;
|
||||||
|
spinlock_t ring_lock;
|
||||||
|
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx_mgr {
|
struct amdgpu_ctx_mgr {
|
||||||
|
@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
|
||||||
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
|
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
|
||||||
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
|
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
|
||||||
|
|
||||||
|
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
|
||||||
|
struct fence *fence);
|
||||||
|
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
||||||
|
struct amdgpu_ring *ring, uint64_t seq);
|
||||||
|
|
||||||
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
|
||||||
struct drm_file *filp);
|
struct drm_file *filp);
|
||||||
|
|
||||||
|
|
|
@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||||
sizeof(struct drm_amdgpu_cs_chunk_dep);
|
sizeof(struct drm_amdgpu_cs_chunk_dep);
|
||||||
|
|
||||||
for (j = 0; j < num_deps; ++j) {
|
for (j = 0; j < num_deps; ++j) {
|
||||||
struct amdgpu_fence *fence;
|
|
||||||
struct amdgpu_ring *ring;
|
struct amdgpu_ring *ring;
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
|
struct fence *fence;
|
||||||
|
|
||||||
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
|
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
|
||||||
deps[j].ip_instance,
|
deps[j].ip_instance,
|
||||||
|
@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||||
if (ctx == NULL)
|
if (ctx == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
r = amdgpu_fence_recreate(ring, p->filp,
|
fence = amdgpu_ctx_get_fence(ctx, ring,
|
||||||
deps[j].handle,
|
deps[j].handle);
|
||||||
&fence);
|
if (IS_ERR(fence)) {
|
||||||
if (r) {
|
r = PTR_ERR(fence);
|
||||||
amdgpu_ctx_put(ctx);
|
amdgpu_ctx_put(ctx);
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
} else if (fence) {
|
||||||
|
r = amdgpu_sync_fence(adev, &ib->sync, fence);
|
||||||
|
fence_put(fence);
|
||||||
|
amdgpu_ctx_put(ctx);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_sync_fence(adev, &ib->sync, &fence->base);
|
|
||||||
amdgpu_fence_unref(&fence);
|
|
||||||
amdgpu_ctx_put(ctx);
|
|
||||||
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||||
r = amdgpu_cs_ib_fill(adev, &parser);
|
r = amdgpu_cs_ib_fill(adev, &parser);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!r)
|
if (!r) {
|
||||||
r = amdgpu_cs_dependencies(adev, &parser);
|
r = amdgpu_cs_dependencies(adev, &parser);
|
||||||
|
if (r)
|
||||||
|
DRM_ERROR("Failed in the dependencies handling %d!\n", r);
|
||||||
|
}
|
||||||
|
|
||||||
if (r) {
|
if (r) {
|
||||||
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
|
||||||
|
@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
|
cs->out.handle = parser.uf.sequence;
|
||||||
out:
|
out:
|
||||||
amdgpu_cs_parser_fini(&parser, r, true);
|
amdgpu_cs_parser_fini(&parser, r, true);
|
||||||
up_read(&adev->exclusive_lock);
|
up_read(&adev->exclusive_lock);
|
||||||
|
@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
|
||||||
union drm_amdgpu_wait_cs *wait = data;
|
union drm_amdgpu_wait_cs *wait = data;
|
||||||
struct amdgpu_device *adev = dev->dev_private;
|
struct amdgpu_device *adev = dev->dev_private;
|
||||||
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
|
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
|
||||||
struct amdgpu_fence *fence = NULL;
|
|
||||||
struct amdgpu_ring *ring = NULL;
|
struct amdgpu_ring *ring = NULL;
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
|
struct fence *fence;
|
||||||
long r;
|
long r;
|
||||||
|
|
||||||
|
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
|
||||||
|
wait->in.ring, &ring);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
|
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
|
||||||
if (ctx == NULL)
|
if (ctx == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
|
fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
|
||||||
wait->in.ring, &ring);
|
if (IS_ERR(fence))
|
||||||
if (r) {
|
r = PTR_ERR(fence);
|
||||||
amdgpu_ctx_put(ctx);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
|
else if (fence) {
|
||||||
if (r) {
|
r = fence_wait_timeout(fence, true, timeout);
|
||||||
amdgpu_ctx_put(ctx);
|
fence_put(fence);
|
||||||
return r;
|
|
||||||
}
|
} else
|
||||||
|
r = 1;
|
||||||
|
|
||||||
r = fence_wait_timeout(&fence->base, true, timeout);
|
|
||||||
amdgpu_fence_unref(&fence);
|
|
||||||
amdgpu_ctx_put(ctx);
|
amdgpu_ctx_put(ctx);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -28,17 +28,22 @@
|
||||||
static void amdgpu_ctx_do_release(struct kref *ref)
|
static void amdgpu_ctx_do_release(struct kref *ref)
|
||||||
{
|
{
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
|
unsigned i, j;
|
||||||
|
|
||||||
ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
ctx = container_of(ref, struct amdgpu_ctx, refcount);
|
||||||
|
|
||||||
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
|
||||||
|
for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
|
||||||
|
fence_put(ctx->rings[i].fences[j]);
|
||||||
kfree(ctx);
|
kfree(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
||||||
uint32_t *id)
|
uint32_t *id)
|
||||||
{
|
{
|
||||||
int r;
|
|
||||||
struct amdgpu_ctx *ctx;
|
struct amdgpu_ctx *ctx;
|
||||||
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
|
||||||
|
int i, r;
|
||||||
|
|
||||||
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
|
@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
|
||||||
|
|
||||||
memset(ctx, 0, sizeof(*ctx));
|
memset(ctx, 0, sizeof(*ctx));
|
||||||
kref_init(&ctx->refcount);
|
kref_init(&ctx->refcount);
|
||||||
|
spin_lock_init(&ctx->ring_lock);
|
||||||
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
|
||||||
|
ctx->rings[i].sequence = 1;
|
||||||
mutex_unlock(&mgr->lock);
|
mutex_unlock(&mgr->lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
|
||||||
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
|
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
|
||||||
|
struct fence *fence)
|
||||||
|
{
|
||||||
|
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
|
||||||
|
uint64_t seq = cring->sequence;
|
||||||
|
unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
|
||||||
|
struct fence *other = cring->fences[idx];
|
||||||
|
|
||||||
|
if (other) {
|
||||||
|
signed long r;
|
||||||
|
r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
|
||||||
|
if (r < 0)
|
||||||
|
DRM_ERROR("Error (%ld) waiting for fence!\n", r);
|
||||||
|
}
|
||||||
|
|
||||||
|
fence_get(fence);
|
||||||
|
|
||||||
|
spin_lock(&ctx->ring_lock);
|
||||||
|
cring->fences[idx] = fence;
|
||||||
|
cring->sequence++;
|
||||||
|
spin_unlock(&ctx->ring_lock);
|
||||||
|
|
||||||
|
fence_put(other);
|
||||||
|
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
|
||||||
|
struct amdgpu_ring *ring, uint64_t seq)
|
||||||
|
{
|
||||||
|
struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
|
||||||
|
struct fence *fence;
|
||||||
|
|
||||||
|
spin_lock(&ctx->ring_lock);
|
||||||
|
if (seq >= cring->sequence) {
|
||||||
|
spin_unlock(&ctx->ring_lock);
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) {
|
||||||
|
spin_unlock(&ctx->ring_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
|
||||||
|
spin_unlock(&ctx->ring_lock);
|
||||||
|
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
|
@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
|
||||||
/* wrap the last IB with fence */
|
/* wrap the last IB with fence */
|
||||||
if (ib->user) {
|
if (ib->user) {
|
||||||
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
|
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
|
||||||
|
ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
|
||||||
|
&ib->fence->base);
|
||||||
addr += ib->user->offset;
|
addr += ib->user->offset;
|
||||||
amdgpu_ring_emit_fence(ring, addr, ib->fence->seq,
|
amdgpu_ring_emit_fence(ring, addr, ib->user->sequence,
|
||||||
AMDGPU_FENCE_FLAG_64BIT);
|
AMDGPU_FENCE_FLAG_64BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue