mirror of https://gitee.com/openkylin/linux.git
drm/amdgpu: add proper job alloc/free functions
And use them in the CS instead of allocating IBs and jobs separately. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucer@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
4acabfe379
commit
50838c8cc4
|
@ -796,6 +796,9 @@ enum amdgpu_ring_type {
|
||||||
|
|
||||||
extern struct amd_sched_backend_ops amdgpu_sched_ops;
|
extern struct amd_sched_backend_ops amdgpu_sched_ops;
|
||||||
|
|
||||||
|
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
|
||||||
|
struct amdgpu_job **job);
|
||||||
|
void amdgpu_job_free(struct amdgpu_job *job);
|
||||||
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
|
int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
|
||||||
struct amdgpu_ring *ring,
|
struct amdgpu_ring *ring,
|
||||||
struct amdgpu_ib *ibs,
|
struct amdgpu_ib *ibs,
|
||||||
|
@ -1216,9 +1219,8 @@ struct amdgpu_cs_parser {
|
||||||
unsigned nchunks;
|
unsigned nchunks;
|
||||||
struct amdgpu_cs_chunk *chunks;
|
struct amdgpu_cs_chunk *chunks;
|
||||||
|
|
||||||
/* indirect buffers */
|
/* scheduler job object */
|
||||||
uint32_t num_ibs;
|
struct amdgpu_job *job;
|
||||||
struct amdgpu_ib *ibs;
|
|
||||||
|
|
||||||
/* buffer objects */
|
/* buffer objects */
|
||||||
struct ww_acquire_ctx ticket;
|
struct ww_acquire_ctx ticket;
|
||||||
|
@ -1249,14 +1251,14 @@ struct amdgpu_job {
|
||||||
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
|
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
|
||||||
uint32_t ib_idx, int idx)
|
uint32_t ib_idx, int idx)
|
||||||
{
|
{
|
||||||
return p->ibs[ib_idx].ptr[idx];
|
return p->job->ibs[ib_idx].ptr[idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
|
static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
|
||||||
uint32_t ib_idx, int idx,
|
uint32_t ib_idx, int idx,
|
||||||
uint32_t value)
|
uint32_t value)
|
||||||
{
|
{
|
||||||
p->ibs[ib_idx].ptr[idx] = value;
|
p->job->ibs[ib_idx].ptr[idx] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -121,7 +121,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
|
||||||
uint64_t *chunk_array_user;
|
uint64_t *chunk_array_user;
|
||||||
uint64_t *chunk_array;
|
uint64_t *chunk_array;
|
||||||
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
||||||
unsigned size;
|
unsigned size, num_ibs = 0;
|
||||||
int i;
|
int i;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -186,7 +186,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
|
||||||
|
|
||||||
switch (p->chunks[i].chunk_id) {
|
switch (p->chunks[i].chunk_id) {
|
||||||
case AMDGPU_CHUNK_ID_IB:
|
case AMDGPU_CHUNK_ID_IB:
|
||||||
p->num_ibs++;
|
++num_ibs;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AMDGPU_CHUNK_ID_FENCE:
|
case AMDGPU_CHUNK_ID_FENCE:
|
||||||
|
@ -211,16 +211,9 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p->num_ibs == 0) {
|
ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
|
||||||
ret = -EINVAL;
|
if (ret)
|
||||||
goto free_all_kdata;
|
goto free_all_kdata;
|
||||||
}
|
|
||||||
|
|
||||||
p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
|
|
||||||
if (!p->ibs) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto free_all_kdata;
|
|
||||||
}
|
|
||||||
|
|
||||||
kfree(chunk_array);
|
kfree(chunk_array);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -414,7 +407,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
|
||||||
|
|
||||||
list_for_each_entry(e, &p->validated, tv.head) {
|
list_for_each_entry(e, &p->validated, tv.head) {
|
||||||
struct reservation_object *resv = e->robj->tbo.resv;
|
struct reservation_object *resv = e->robj->tbo.resv;
|
||||||
r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp);
|
r = amdgpu_sync_resv(p->adev, &p->job->ibs[0].sync, resv, p->filp);
|
||||||
|
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
@ -477,10 +470,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
|
||||||
for (i = 0; i < parser->nchunks; i++)
|
for (i = 0; i < parser->nchunks; i++)
|
||||||
drm_free_large(parser->chunks[i].kdata);
|
drm_free_large(parser->chunks[i].kdata);
|
||||||
kfree(parser->chunks);
|
kfree(parser->chunks);
|
||||||
if (parser->ibs)
|
if (parser->job)
|
||||||
for (i = 0; i < parser->num_ibs; i++)
|
amdgpu_job_free(parser->job);
|
||||||
amdgpu_ib_free(parser->adev, &parser->ibs[i]);
|
|
||||||
kfree(parser->ibs);
|
|
||||||
amdgpu_bo_unref(&parser->uf.bo);
|
amdgpu_bo_unref(&parser->uf.bo);
|
||||||
amdgpu_bo_unref(&parser->uf_entry.robj);
|
amdgpu_bo_unref(&parser->uf_entry.robj);
|
||||||
}
|
}
|
||||||
|
@ -497,7 +488,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
|
r = amdgpu_sync_fence(adev, &p->job->ibs[0].sync, vm->page_directory_fence);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@ -523,14 +514,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
f = bo_va->last_pt_update;
|
f = bo_va->last_pt_update;
|
||||||
r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
|
r = amdgpu_sync_fence(adev, &p->job->ibs[0].sync, f);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync);
|
r = amdgpu_vm_clear_invalids(adev, vm, &p->job->ibs[0].sync);
|
||||||
|
|
||||||
if (amdgpu_vm_debug && p->bo_list) {
|
if (amdgpu_vm_debug && p->bo_list) {
|
||||||
/* Invalidate all BOs to test for userspace bugs */
|
/* Invalidate all BOs to test for userspace bugs */
|
||||||
|
@ -556,8 +547,8 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
|
||||||
int i, r;
|
int i, r;
|
||||||
|
|
||||||
/* Only for UVD/VCE VM emulation */
|
/* Only for UVD/VCE VM emulation */
|
||||||
for (i = 0; i < parser->num_ibs; i++) {
|
for (i = 0; i < parser->job->num_ibs; i++) {
|
||||||
ring = parser->ibs[i].ring;
|
ring = parser->job->ibs[i].ring;
|
||||||
if (ring->funcs->parse_cs) {
|
if (ring->funcs->parse_cs) {
|
||||||
r = amdgpu_ring_parse_cs(ring, parser, i);
|
r = amdgpu_ring_parse_cs(ring, parser, i);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -590,14 +581,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||||
int i, j;
|
int i, j;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) {
|
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
|
||||||
struct amdgpu_cs_chunk *chunk;
|
struct amdgpu_cs_chunk *chunk;
|
||||||
struct amdgpu_ib *ib;
|
struct amdgpu_ib *ib;
|
||||||
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
|
||||||
struct amdgpu_ring *ring;
|
struct amdgpu_ring *ring;
|
||||||
|
|
||||||
chunk = &parser->chunks[i];
|
chunk = &parser->chunks[i];
|
||||||
ib = &parser->ibs[j];
|
ib = &parser->job->ibs[j];
|
||||||
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
|
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
|
||||||
|
|
||||||
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
|
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
|
||||||
|
@ -666,7 +657,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||||
struct amdgpu_bo *gds = parser->bo_list->gds_obj;
|
struct amdgpu_bo *gds = parser->bo_list->gds_obj;
|
||||||
struct amdgpu_bo *gws = parser->bo_list->gws_obj;
|
struct amdgpu_bo *gws = parser->bo_list->gws_obj;
|
||||||
struct amdgpu_bo *oa = parser->bo_list->oa_obj;
|
struct amdgpu_bo *oa = parser->bo_list->oa_obj;
|
||||||
struct amdgpu_ib *ib = &parser->ibs[0];
|
struct amdgpu_ib *ib = &parser->job->ibs[0];
|
||||||
|
|
||||||
if (gds) {
|
if (gds) {
|
||||||
ib->gds_base = amdgpu_bo_gpu_offset(gds);
|
ib->gds_base = amdgpu_bo_gpu_offset(gds);
|
||||||
|
@ -683,7 +674,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||||
}
|
}
|
||||||
/* wrap the last IB with user fence */
|
/* wrap the last IB with user fence */
|
||||||
if (parser->uf.bo) {
|
if (parser->uf.bo) {
|
||||||
struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
|
struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
|
||||||
|
|
||||||
/* UVD & VCE fw doesn't support user fences */
|
/* UVD & VCE fw doesn't support user fences */
|
||||||
if (ib->ring->type == AMDGPU_RING_TYPE_UVD ||
|
if (ib->ring->type == AMDGPU_RING_TYPE_UVD ||
|
||||||
|
@ -704,7 +695,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||||
int i, j, r;
|
int i, j, r;
|
||||||
|
|
||||||
/* Add dependencies to first IB */
|
/* Add dependencies to first IB */
|
||||||
ib = &p->ibs[0];
|
ib = &p->job->ibs[0];
|
||||||
for (i = 0; i < p->nchunks; ++i) {
|
for (i = 0; i < p->nchunks; ++i) {
|
||||||
struct drm_amdgpu_cs_chunk_dep *deps;
|
struct drm_amdgpu_cs_chunk_dep *deps;
|
||||||
struct amdgpu_cs_chunk *chunk;
|
struct amdgpu_cs_chunk *chunk;
|
||||||
|
@ -756,26 +747,19 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
|
||||||
|
|
||||||
static int amdgpu_cs_free_job(struct amdgpu_job *job)
|
static int amdgpu_cs_free_job(struct amdgpu_job *job)
|
||||||
{
|
{
|
||||||
int i;
|
amdgpu_job_free(job);
|
||||||
if (job->ibs)
|
|
||||||
for (i = 0; i < job->num_ibs; i++)
|
|
||||||
amdgpu_ib_free(job->adev, &job->ibs[i]);
|
|
||||||
kfree(job->ibs);
|
|
||||||
if (job->uf.bo)
|
|
||||||
amdgpu_bo_unref(&job->uf.bo);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||||
union drm_amdgpu_cs *cs)
|
union drm_amdgpu_cs *cs)
|
||||||
{
|
{
|
||||||
struct amdgpu_ring * ring = p->ibs->ring;
|
struct amdgpu_ring * ring = p->job->ibs->ring;
|
||||||
struct amd_sched_fence *fence;
|
struct amd_sched_fence *fence;
|
||||||
struct amdgpu_job *job;
|
struct amdgpu_job *job;
|
||||||
|
|
||||||
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
|
job = p->job;
|
||||||
if (!job)
|
p->job = NULL;
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
job->base.sched = &ring->sched;
|
job->base.sched = &ring->sched;
|
||||||
job->base.s_entity = &p->ctx->rings[ring->idx].entity;
|
job->base.s_entity = &p->ctx->rings[ring->idx].entity;
|
||||||
|
@ -783,11 +767,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||||
job->owner = p->filp;
|
job->owner = p->filp;
|
||||||
job->free_job = amdgpu_cs_free_job;
|
job->free_job = amdgpu_cs_free_job;
|
||||||
|
|
||||||
job->ibs = p->ibs;
|
|
||||||
job->num_ibs = p->num_ibs;
|
|
||||||
p->ibs = NULL;
|
|
||||||
p->num_ibs = 0;
|
|
||||||
|
|
||||||
if (job->ibs[job->num_ibs - 1].user) {
|
if (job->ibs[job->num_ibs - 1].user) {
|
||||||
job->uf = p->uf;
|
job->uf = p->uf;
|
||||||
job->ibs[job->num_ibs - 1].user = &job->uf;
|
job->ibs[job->num_ibs - 1].user = &job->uf;
|
||||||
|
@ -854,7 +833,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||||
if (r)
|
if (r)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
for (i = 0; i < parser.num_ibs; i++)
|
for (i = 0; i < parser.job->num_ibs; i++)
|
||||||
trace_amdgpu_cs(&parser, i);
|
trace_amdgpu_cs(&parser, i);
|
||||||
|
|
||||||
r = amdgpu_cs_ib_vm_chunk(adev, &parser);
|
r = amdgpu_cs_ib_vm_chunk(adev, &parser);
|
||||||
|
|
|
@ -28,6 +28,39 @@
|
||||||
#include "amdgpu.h"
|
#include "amdgpu.h"
|
||||||
#include "amdgpu_trace.h"
|
#include "amdgpu_trace.h"
|
||||||
|
|
||||||
|
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
|
||||||
|
struct amdgpu_job **job)
|
||||||
|
{
|
||||||
|
size_t size = sizeof(struct amdgpu_job);
|
||||||
|
|
||||||
|
if (num_ibs == 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
size += sizeof(struct amdgpu_ib) * num_ibs;
|
||||||
|
|
||||||
|
*job = kzalloc(size, GFP_KERNEL);
|
||||||
|
if (!*job)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
(*job)->adev = adev;
|
||||||
|
(*job)->ibs = (void *)&(*job)[1];
|
||||||
|
(*job)->num_ibs = num_ibs;
|
||||||
|
(*job)->free_job = NULL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void amdgpu_job_free(struct amdgpu_job *job)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i = 0; i < job->num_ibs; ++i)
|
||||||
|
amdgpu_ib_free(job->adev, &job->ibs[i]);
|
||||||
|
|
||||||
|
amdgpu_bo_unref(&job->uf.bo);
|
||||||
|
/* TODO: Free the job structure here as well */
|
||||||
|
}
|
||||||
|
|
||||||
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
|
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
|
||||||
{
|
{
|
||||||
struct amdgpu_job *job = to_amdgpu_job(sched_job);
|
struct amdgpu_job *job = to_amdgpu_job(sched_job);
|
||||||
|
|
|
@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->bo_list = p->bo_list;
|
__entry->bo_list = p->bo_list;
|
||||||
__entry->ring = p->ibs[i].ring->idx;
|
__entry->ring = p->job->ibs[i].ring->idx;
|
||||||
__entry->dw = p->ibs[i].length_dw;
|
__entry->dw = p->job->ibs[i].length_dw;
|
||||||
__entry->fences = amdgpu_fence_count_emitted(
|
__entry->fences = amdgpu_fence_count_emitted(
|
||||||
p->ibs[i].ring);
|
p->job->ibs[i].ring);
|
||||||
),
|
),
|
||||||
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
|
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
|
||||||
__entry->bo_list, __entry->ring, __entry->dw,
|
__entry->bo_list, __entry->ring, __entry->dw,
|
||||||
|
|
|
@ -702,7 +702,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
|
||||||
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
|
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
|
||||||
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
|
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
|
||||||
{
|
{
|
||||||
struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
|
struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
|
||||||
int i, r;
|
int i, r;
|
||||||
|
|
||||||
ctx->idx++;
|
ctx->idx++;
|
||||||
|
@ -748,7 +748,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
|
||||||
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
|
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
|
||||||
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
|
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
|
||||||
{
|
{
|
||||||
struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
|
struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
|
for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
|
||||||
|
@ -790,7 +790,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
|
||||||
[0x00000003] = 2048,
|
[0x00000003] = 2048,
|
||||||
[0x00000004] = 0xFFFFFFFF,
|
[0x00000004] = 0xFFFFFFFF,
|
||||||
};
|
};
|
||||||
struct amdgpu_ib *ib = &parser->ibs[ib_idx];
|
struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (ib->length_dw % 16) {
|
if (ib->length_dw % 16) {
|
||||||
|
|
|
@ -603,7 +603,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
|
||||||
*/
|
*/
|
||||||
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
|
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
|
||||||
{
|
{
|
||||||
struct amdgpu_ib *ib = &p->ibs[ib_idx];
|
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
|
||||||
unsigned fb_idx = 0, bs_idx = 0;
|
unsigned fb_idx = 0, bs_idx = 0;
|
||||||
int session_idx = -1;
|
int session_idx = -1;
|
||||||
bool destroyed = false;
|
bool destroyed = false;
|
||||||
|
|
Loading…
Reference in New Issue