|
|
|
@ -64,11 +64,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
|
|
|
|
static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
|
|
|
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib,
|
|
|
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib)
|
|
|
|
unsigned int *num_ibs)
|
|
|
|
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct drm_sched_entity *entity;
|
|
|
|
struct drm_sched_entity *entity;
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
|
|
|
|
r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
|
|
|
|
@ -77,17 +77,38 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
/* Abort if there is no run queue associated with this entity.
|
|
|
|
/*
|
|
|
|
* Possibly because of disabled HW IP*/
|
|
|
|
* Abort if there is no run queue associated with this entity.
|
|
|
|
|
|
|
|
* Possibly because of disabled HW IP.
|
|
|
|
|
|
|
|
*/
|
|
|
|
if (entity->rq == NULL)
|
|
|
|
if (entity->rq == NULL)
|
|
|
|
return -EINVAL;
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
|
|
/* Currently we don't support submitting to multiple entities */
|
|
|
|
/* Check if we can add this IB to some existing job */
|
|
|
|
if (p->entity && p->entity != entity)
|
|
|
|
for (i = 0; i < p->gang_size; ++i)
|
|
|
|
|
|
|
|
if (p->entities[i] == entity)
|
|
|
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* If not increase the gang size if possible */
|
|
|
|
|
|
|
|
if (i == AMDGPU_CS_GANG_SIZE)
|
|
|
|
return -EINVAL;
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
|
|
|
|
p->entity = entity;
|
|
|
|
p->entities[i] = entity;
|
|
|
|
++(*num_ibs);
|
|
|
|
p->gang_size = i + 1;
|
|
|
|
|
|
|
|
return i;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
|
|
|
|
|
|
|
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib,
|
|
|
|
|
|
|
|
unsigned int *num_ibs)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_cs_job_idx(p, chunk_ib);
|
|
|
|
|
|
|
|
if (r < 0)
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
++(num_ibs[r]);
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -161,11 +182,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|
|
|
union drm_amdgpu_cs *cs)
|
|
|
|
union drm_amdgpu_cs *cs)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
|
|
|
|
unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
uint64_t *chunk_array_user;
|
|
|
|
uint64_t *chunk_array_user;
|
|
|
|
uint64_t *chunk_array;
|
|
|
|
uint64_t *chunk_array;
|
|
|
|
unsigned size, num_ibs = 0;
|
|
|
|
|
|
|
|
uint32_t uf_offset = 0;
|
|
|
|
uint32_t uf_offset = 0;
|
|
|
|
|
|
|
|
unsigned int size;
|
|
|
|
int ret;
|
|
|
|
int ret;
|
|
|
|
int i;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
|
|
@ -228,7 +250,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|
|
|
if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
|
|
|
|
if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
|
|
|
|
goto free_partial_kdata;
|
|
|
|
goto free_partial_kdata;
|
|
|
|
|
|
|
|
|
|
|
|
ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs);
|
|
|
|
ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
|
|
|
|
if (ret)
|
|
|
|
if (ret)
|
|
|
|
goto free_partial_kdata;
|
|
|
|
goto free_partial_kdata;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
@ -265,21 +287,28 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
|
|
|
|
if (!p->gang_size)
|
|
|
|
if (ret)
|
|
|
|
return -EINVAL;
|
|
|
|
goto free_all_kdata;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ret = drm_sched_job_init(&p->job->base, p->entity, &fpriv->vm);
|
|
|
|
for (i = 0; i < p->gang_size; ++i) {
|
|
|
|
if (ret)
|
|
|
|
ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
|
|
|
|
goto free_all_kdata;
|
|
|
|
if (ret)
|
|
|
|
|
|
|
|
goto free_all_kdata;
|
|
|
|
|
|
|
|
|
|
|
|
if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
|
|
|
|
ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
|
|
|
|
|
|
|
|
&fpriv->vm);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
|
|
|
|
goto free_all_kdata;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
p->gang_leader = p->jobs[p->gang_size - 1];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
|
|
|
|
ret = -ECANCELED;
|
|
|
|
ret = -ECANCELED;
|
|
|
|
goto free_all_kdata;
|
|
|
|
goto free_all_kdata;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (p->uf_entry.tv.bo)
|
|
|
|
if (p->uf_entry.tv.bo)
|
|
|
|
p->job->uf_addr = uf_offset;
|
|
|
|
p->gang_leader->uf_addr = uf_offset;
|
|
|
|
kvfree(chunk_array);
|
|
|
|
kvfree(chunk_array);
|
|
|
|
|
|
|
|
|
|
|
|
/* Use this opportunity to fill in task info for the vm */
|
|
|
|
/* Use this opportunity to fill in task info for the vm */
|
|
|
|
@ -303,17 +332,25 @@ free_chunk:
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
|
|
|
|
static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
|
|
|
|
struct amdgpu_cs_chunk *chunk,
|
|
|
|
struct amdgpu_cs_chunk *chunk,
|
|
|
|
unsigned int *num_ibs,
|
|
|
|
|
|
|
|
unsigned int *ce_preempt,
|
|
|
|
unsigned int *ce_preempt,
|
|
|
|
unsigned int *de_preempt)
|
|
|
|
unsigned int *de_preempt)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
|
|
|
|
struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_ring *ring = amdgpu_job_ring(p->job);
|
|
|
|
|
|
|
|
struct amdgpu_ib *ib = &p->job->ibs[*num_ibs];
|
|
|
|
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
|
|
|
|
struct amdgpu_ring *ring;
|
|
|
|
|
|
|
|
struct amdgpu_job *job;
|
|
|
|
|
|
|
|
struct amdgpu_ib *ib;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_cs_job_idx(p, chunk_ib);
|
|
|
|
|
|
|
|
if (r < 0)
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
job = p->jobs[r];
|
|
|
|
|
|
|
|
ring = amdgpu_job_ring(job);
|
|
|
|
|
|
|
|
ib = &job->ibs[job->num_ibs++];
|
|
|
|
|
|
|
|
|
|
|
|
/* MM engine doesn't support user fences */
|
|
|
|
/* MM engine doesn't support user fences */
|
|
|
|
if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
|
|
|
|
if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
|
|
|
|
return -EINVAL;
|
|
|
|
return -EINVAL;
|
|
|
|
@ -332,7 +369,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
|
|
|
|
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
|
|
|
|
p->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
|
|
|
|
job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
|
|
|
|
r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
|
|
|
|
chunk_ib->ib_bytes : 0,
|
|
|
|
chunk_ib->ib_bytes : 0,
|
|
|
|
@ -345,8 +382,6 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
|
|
|
|
ib->gpu_addr = chunk_ib->va_start;
|
|
|
|
ib->gpu_addr = chunk_ib->va_start;
|
|
|
|
ib->length_dw = chunk_ib->ib_bytes / 4;
|
|
|
|
ib->length_dw = chunk_ib->ib_bytes / 4;
|
|
|
|
ib->flags = chunk_ib->flags;
|
|
|
|
ib->flags = chunk_ib->flags;
|
|
|
|
|
|
|
|
|
|
|
|
(*num_ibs)++;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -395,7 +430,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
|
|
|
|
dma_fence_put(old);
|
|
|
|
dma_fence_put(old);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, fence);
|
|
|
|
r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
|
|
|
|
dma_fence_put(fence);
|
|
|
|
dma_fence_put(fence);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
@ -417,7 +452,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, fence);
|
|
|
|
r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
|
|
|
|
dma_fence_put(fence);
|
|
|
|
dma_fence_put(fence);
|
|
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
@ -540,7 +575,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
|
|
|
|
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
unsigned int num_ibs = 0, ce_preempt = 0, de_preempt = 0;
|
|
|
|
unsigned int ce_preempt = 0, de_preempt = 0;
|
|
|
|
int i, r;
|
|
|
|
int i, r;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < p->nchunks; ++i) {
|
|
|
|
for (i = 0; i < p->nchunks; ++i) {
|
|
|
|
@ -550,8 +585,7 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
|
|
|
|
|
|
|
|
|
|
|
|
switch (chunk->chunk_id) {
|
|
|
|
switch (chunk->chunk_id) {
|
|
|
|
case AMDGPU_CHUNK_ID_IB:
|
|
|
|
case AMDGPU_CHUNK_ID_IB:
|
|
|
|
r = amdgpu_cs_p2_ib(p, chunk, &num_ibs,
|
|
|
|
r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
|
|
|
|
&ce_preempt, &de_preempt);
|
|
|
|
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
@ -822,6 +856,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct list_head duplicates;
|
|
|
|
struct list_head duplicates;
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&p->validated);
|
|
|
|
INIT_LIST_HEAD(&p->validated);
|
|
|
|
@ -905,16 +940,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|
|
|
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
|
|
|
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Move fence waiting after getting reservation lock of
|
|
|
|
|
|
|
|
* PD root. Then there is no need on a ctx mutex lock.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
|
|
|
|
|
|
|
|
if (unlikely(r != 0)) {
|
|
|
|
|
|
|
|
if (r != -ERESTARTSYS)
|
|
|
|
|
|
|
|
DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
|
|
|
|
|
|
|
|
goto error_validate;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
|
|
|
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
|
|
|
&p->bytes_moved_vis_threshold);
|
|
|
|
&p->bytes_moved_vis_threshold);
|
|
|
|
p->bytes_moved = 0;
|
|
|
|
p->bytes_moved = 0;
|
|
|
|
@ -942,13 +967,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
goto error_validate;
|
|
|
|
goto error_validate;
|
|
|
|
|
|
|
|
|
|
|
|
p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
|
|
|
|
p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
|
|
|
|
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
|
|
|
|
p->bytes_moved_vis);
|
|
|
|
p->bytes_moved_vis);
|
|
|
|
amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
|
|
|
|
|
|
|
|
p->bo_list->gws_obj, p->bo_list->oa_obj);
|
|
|
|
for (i = 0; i < p->gang_size; ++i)
|
|
|
|
|
|
|
|
amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
|
|
|
|
|
|
|
|
p->bo_list->gws_obj,
|
|
|
|
|
|
|
|
p->bo_list->oa_obj);
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
error_validate:
|
|
|
|
error_validate:
|
|
|
|
@ -967,20 +995,24 @@ out_free_user_pages:
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
|
|
|
|
static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
|
|
|
|
if (!trace_amdgpu_cs_enabled())
|
|
|
|
if (!trace_amdgpu_cs_enabled())
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < parser->job->num_ibs; i++)
|
|
|
|
for (i = 0; i < p->gang_size; ++i) {
|
|
|
|
trace_amdgpu_cs(parser, i);
|
|
|
|
struct amdgpu_job *job = p->jobs[i];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (j = 0; j < job->num_ibs; ++j)
|
|
|
|
|
|
|
|
trace_amdgpu_cs(p, job, &job->ibs[j]);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p)
|
|
|
|
static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
|
|
|
|
|
|
|
|
struct amdgpu_job *job)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct amdgpu_job *job = p->job;
|
|
|
|
|
|
|
|
struct amdgpu_ring *ring = amdgpu_job_ring(job);
|
|
|
|
struct amdgpu_ring *ring = amdgpu_job_ring(job);
|
|
|
|
unsigned int i;
|
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
@ -1021,12 +1053,12 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p)
|
|
|
|
memcpy(ib->ptr, kptr, ib->length_dw * 4);
|
|
|
|
memcpy(ib->ptr, kptr, ib->length_dw * 4);
|
|
|
|
amdgpu_bo_kunmap(aobj);
|
|
|
|
amdgpu_bo_kunmap(aobj);
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
|
|
|
|
r = amdgpu_ring_parse_cs(ring, p, job, ib);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
ib->ptr = (uint32_t *)kptr;
|
|
|
|
ib->ptr = (uint32_t *)kptr;
|
|
|
|
r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
|
|
|
|
r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
|
|
|
|
amdgpu_bo_kunmap(aobj);
|
|
|
|
amdgpu_bo_kunmap(aobj);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
@ -1036,19 +1068,31 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p)
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < p->gang_size; ++i) {
|
|
|
|
|
|
|
|
r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
|
|
|
|
|
|
|
|
if (r)
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
|
|
|
|
struct amdgpu_job *job = p->gang_leader;
|
|
|
|
struct amdgpu_device *adev = p->adev;
|
|
|
|
struct amdgpu_device *adev = p->adev;
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_vm *vm = &fpriv->vm;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_bo_va *bo_va;
|
|
|
|
struct amdgpu_bo_va *bo_va;
|
|
|
|
struct amdgpu_bo *bo;
|
|
|
|
struct amdgpu_bo *bo;
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
if (!p->job->vm)
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_vm_clear_freed(adev, vm, NULL);
|
|
|
|
r = amdgpu_vm_clear_freed(adev, vm, NULL);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
@ -1057,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
|
|
|
|
r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
@ -1068,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
|
|
|
|
r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@ -1087,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
|
|
|
|
r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
@ -1100,11 +1144,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
|
|
|
|
r = amdgpu_sync_fence(&job->sync, vm->last_update);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
|
|
|
|
|
|
|
|
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
|
|
|
|
for (i = 0; i < p->gang_size; ++i) {
|
|
|
|
|
|
|
|
job = p->jobs[i];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!job->vm)
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (amdgpu_vm_debug) {
|
|
|
|
if (amdgpu_vm_debug) {
|
|
|
|
/* Invalidate all BOs to test for userspace bugs */
|
|
|
|
/* Invalidate all BOs to test for userspace bugs */
|
|
|
|
@ -1125,7 +1176,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|
|
|
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
|
|
|
|
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
|
|
|
|
struct amdgpu_job *leader = p->gang_leader;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
|
|
|
|
unsigned int i;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
list_for_each_entry(e, &p->validated, tv.head) {
|
|
|
|
list_for_each_entry(e, &p->validated, tv.head) {
|
|
|
|
@ -1135,12 +1188,23 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
|
|
|
|
|
|
|
|
|
|
|
|
sync_mode = amdgpu_bo_explicit_sync(bo) ?
|
|
|
|
sync_mode = amdgpu_bo_explicit_sync(bo) ?
|
|
|
|
AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
|
|
|
|
AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
|
|
|
|
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
|
|
|
|
r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
|
|
|
|
&fpriv->vm);
|
|
|
|
&fpriv->vm);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < p->gang_size - 1; ++i) {
|
|
|
|
|
|
|
|
r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
|
|
|
|
|
|
|
|
if (r)
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
|
|
|
|
|
|
|
|
if (r && r != -ERESTARTSYS)
|
|
|
|
|
|
|
|
DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
|
|
|
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
|
|
|
@ -1164,16 +1228,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|
|
|
union drm_amdgpu_cs *cs)
|
|
|
|
union drm_amdgpu_cs *cs)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
|
|
|
|
struct drm_sched_entity *entity = p->entity;
|
|
|
|
struct amdgpu_job *leader = p->gang_leader;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_bo_list_entry *e;
|
|
|
|
struct amdgpu_job *job;
|
|
|
|
unsigned int i;
|
|
|
|
uint64_t seq;
|
|
|
|
uint64_t seq;
|
|
|
|
int r;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
job = p->job;
|
|
|
|
for (i = 0; i < p->gang_size; ++i)
|
|
|
|
p->job = NULL;
|
|
|
|
drm_sched_job_arm(&p->jobs[i]->base);
|
|
|
|
|
|
|
|
|
|
|
|
drm_sched_job_arm(&job->base);
|
|
|
|
for (i = 0; i < (p->gang_size - 1); ++i) {
|
|
|
|
|
|
|
|
struct dma_fence *fence;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fence = &p->jobs[i]->base.s_fence->scheduled;
|
|
|
|
|
|
|
|
r = amdgpu_sync_fence(&leader->sync, fence);
|
|
|
|
|
|
|
|
if (r)
|
|
|
|
|
|
|
|
goto error_cleanup;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (p->gang_size > 1) {
|
|
|
|
|
|
|
|
for (i = 0; i < p->gang_size; ++i)
|
|
|
|
|
|
|
|
amdgpu_job_set_gang_leader(p->jobs[i], leader);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* No memory allocation is allowed while holding the notifier lock.
|
|
|
|
/* No memory allocation is allowed while holding the notifier lock.
|
|
|
|
* The lock is held until amdgpu_cs_submit is finished and fence is
|
|
|
|
* The lock is held until amdgpu_cs_submit is finished and fence is
|
|
|
|
@ -1191,45 +1267,57 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (r) {
|
|
|
|
if (r) {
|
|
|
|
r = -EAGAIN;
|
|
|
|
r = -EAGAIN;
|
|
|
|
goto error_abort;
|
|
|
|
goto error_unlock;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
p->fence = dma_fence_get(&job->base.s_fence->finished);
|
|
|
|
p->fence = dma_fence_get(&leader->base.s_fence->finished);
|
|
|
|
|
|
|
|
list_for_each_entry(e, &p->validated, tv.head) {
|
|
|
|
|
|
|
|
|
|
|
|
seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
|
|
|
|
/* Everybody except for the gang leader uses READ */
|
|
|
|
|
|
|
|
for (i = 0; i < (p->gang_size - 1); ++i) {
|
|
|
|
|
|
|
|
dma_resv_add_fence(e->tv.bo->base.resv,
|
|
|
|
|
|
|
|
&p->jobs[i]->base.s_fence->finished,
|
|
|
|
|
|
|
|
DMA_RESV_USAGE_READ);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* The gang leader is remembered as writer */
|
|
|
|
|
|
|
|
e->tv.num_shared = 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
|
|
|
|
|
|
|
|
p->fence);
|
|
|
|
amdgpu_cs_post_dependencies(p);
|
|
|
|
amdgpu_cs_post_dependencies(p);
|
|
|
|
|
|
|
|
|
|
|
|
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
|
|
|
|
if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
|
|
|
|
!p->ctx->preamble_presented) {
|
|
|
|
!p->ctx->preamble_presented) {
|
|
|
|
job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
|
|
|
|
leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
|
|
|
|
p->ctx->preamble_presented = true;
|
|
|
|
p->ctx->preamble_presented = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cs->out.handle = seq;
|
|
|
|
cs->out.handle = seq;
|
|
|
|
job->uf_sequence = seq;
|
|
|
|
leader->uf_sequence = seq;
|
|
|
|
|
|
|
|
|
|
|
|
amdgpu_job_free_resources(job);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trace_amdgpu_cs_ioctl(job);
|
|
|
|
|
|
|
|
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
|
|
|
|
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
|
|
|
|
drm_sched_entity_push_job(&job->base);
|
|
|
|
for (i = 0; i < p->gang_size; ++i) {
|
|
|
|
|
|
|
|
amdgpu_job_free_resources(p->jobs[i]);
|
|
|
|
|
|
|
|
trace_amdgpu_cs_ioctl(p->jobs[i]);
|
|
|
|
|
|
|
|
drm_sched_entity_push_job(&p->jobs[i]->base);
|
|
|
|
|
|
|
|
p->jobs[i] = NULL;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
|
|
|
|
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
|
|
|
|
|
|
|
|
|
|
|
|
/* Make sure all BOs are remembered as writers */
|
|
|
|
|
|
|
|
amdgpu_bo_list_for_each_entry(e, p->bo_list)
|
|
|
|
|
|
|
|
e->tv.num_shared = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
|
|
|
|
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
|
|
|
|
|
|
|
|
|
|
|
|
mutex_unlock(&p->adev->notifier_lock);
|
|
|
|
mutex_unlock(&p->adev->notifier_lock);
|
|
|
|
mutex_unlock(&p->bo_list->bo_list_mutex);
|
|
|
|
mutex_unlock(&p->bo_list->bo_list_mutex);
|
|
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
error_abort:
|
|
|
|
error_unlock:
|
|
|
|
drm_sched_job_cleanup(&job->base);
|
|
|
|
|
|
|
|
mutex_unlock(&p->adev->notifier_lock);
|
|
|
|
mutex_unlock(&p->adev->notifier_lock);
|
|
|
|
amdgpu_job_free(job);
|
|
|
|
|
|
|
|
|
|
|
|
error_cleanup:
|
|
|
|
|
|
|
|
for (i = 0; i < p->gang_size; ++i)
|
|
|
|
|
|
|
|
drm_sched_job_cleanup(&p->jobs[i]->base);
|
|
|
|
return r;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@ -1246,17 +1334,18 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
|
|
|
|
|
|
|
|
|
|
|
|
dma_fence_put(parser->fence);
|
|
|
|
dma_fence_put(parser->fence);
|
|
|
|
|
|
|
|
|
|
|
|
if (parser->ctx) {
|
|
|
|
if (parser->ctx)
|
|
|
|
amdgpu_ctx_put(parser->ctx);
|
|
|
|
amdgpu_ctx_put(parser->ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (parser->bo_list)
|
|
|
|
if (parser->bo_list)
|
|
|
|
amdgpu_bo_list_put(parser->bo_list);
|
|
|
|
amdgpu_bo_list_put(parser->bo_list);
|
|
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < parser->nchunks; i++)
|
|
|
|
for (i = 0; i < parser->nchunks; i++)
|
|
|
|
kvfree(parser->chunks[i].kdata);
|
|
|
|
kvfree(parser->chunks[i].kdata);
|
|
|
|
kvfree(parser->chunks);
|
|
|
|
kvfree(parser->chunks);
|
|
|
|
if (parser->job)
|
|
|
|
for (i = 0; i < parser->gang_size; ++i) {
|
|
|
|
amdgpu_job_free(parser->job);
|
|
|
|
if (parser->jobs[i])
|
|
|
|
|
|
|
|
amdgpu_job_free(parser->jobs[i]);
|
|
|
|
|
|
|
|
}
|
|
|
|
if (parser->uf_entry.tv.bo) {
|
|
|
|
if (parser->uf_entry.tv.bo) {
|
|
|
|
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
|
|
|
|
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
|
|
|
|
|
|
|
|
|
|
|
|
@ -1300,7 +1389,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|
|
|
goto error_fini;
|
|
|
|
goto error_fini;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
r = amdgpu_cs_patch_ibs(&parser);
|
|
|
|
r = amdgpu_cs_patch_jobs(&parser);
|
|
|
|
if (r)
|
|
|
|
if (r)
|
|
|
|
goto error_backoff;
|
|
|
|
goto error_backoff;
|
|
|
|
|
|
|
|
|
|
|
|
|