amdgpu_fence_emit 这个是在给硬件提交任务或者软件提交任务之前, 标记一下任务的fence的, 可以理解为, 如果当前标记的fence seq回来了, 任务就做完了, 所以, 在amdgpu_ib_schedule 时会给kernel做一下amdgpu_fence_emit 的包, 以此, 硬件完成任务, 会在gfx_v8.c里面isr进入到eop的handle里, 来signal 之前gpu-sched的dma-fence
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 82823d9a8ba88..5a442079cf62f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -937,6 +937,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
return amdgpu_cs_sync_rings(p);
}
+//这里的最后是要等之前对应的entity上的任务做完的, 我们参数是32个jobs,那就只能塞32个job的, 每个job的ib倒是可以多塞一些
static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct amdgpu_cs_parser *parser)
{
@@ -996,6 +997,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return r;
}
+ // package ib addr to ring ,此处的bo是在bo_list上的, 并且4字节(dw)对齐
ib->gpu_addr = chunk_ib->va_start;
ib->length_dw = chunk_ib->ib_bytes / 4;
ib->flags = chunk_ib->flags;
@@ -1219,27 +1221,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
chunk = &p->chunks[i];
switch (chunk->chunk_id) {
+ //依赖其他cntx的dma-fence, 在gpu-schedule运行之前需要wait这些fence完成
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
return r;
break;
+ //依赖别人的syncobj
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r)
return r;
break;
+ //同步自己重建的syncobj, 在我们job做完会dmafence到syncobj
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r)
return r;
break;
+ //
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
if (r)
return r;
break;
+ //异步的等到job完成之后将该sync pending
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
if (r)
@@ -1307,6 +1314,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
job->owner = p->filp;
+
+ //保存 硬件完成fence, finished fence, wait_cs ioctl 等的也是这个
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 2cdaf3b2a7217..5e753a594de8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -100,12 +100,15 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
goto error_free_fences;
}
+ // 分配每一个entity的fence和seq
for (i = 0; i < num_entities; ++i) {
struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
entity->sequence = 1;
entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
+
+ // 上一个kcalloc只是申请, 现在是标记好 每个ip对应的entity资源
for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
ctx->entities[i] = ctx->entities[i - 1] +
amdgpu_ctx_num_entities[i - 1];
@@ -480,6 +483,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
dma_fence_get(fence);
spin_lock(&ctx->ring_lock);
+ //这个fence是 gpu-sched的提交fence ===> s_fence
centity->fences[idx] = fence;
centity->sequence++;
spin_unlock(&ctx->ring_lock);
@@ -537,6 +541,7 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
}
}
+//提交任务之前, 先等之前这个entity上对应这个job的任务干完再提交, 不然就覆盖啦, 所以后边有 BUG_ON
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
struct drm_sched_entity *entity)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7a6c837c0a85f..227487f086525 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1800,6 +1800,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r) {
DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
goto init_failed;
+
}
adev->ip_blocks[i].status.hw = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f9bef3154b998..630d3e935b9c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -191,6 +191,7 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
+//mec -> pipe -> queue
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe, mec;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index e5c83e164d82a..8214c7e8b2c39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -319,7 +319,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
- (void **)&ring->ring);
+ (void **)&ring->ring); //知道cpu视角的ring, 在ib_schedule时填数据
if (r) {
dev_err(adev->dev, "(%d) ring create failed\n", r);
return r;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index f39b97ed4ade4..ebc542119fa02 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -129,6 +129,8 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
spin_lock(&rq->lock);
+//这里是如果一个entity已经有job, 那就要在rq里再取下一个entity里的其他job
+//特殊的, 如果一个rb只有一个entity, 那就一直用这个. 比如gfx只有一个ringbuffer,只有一个cntx, 优先级只有一个(rq唯一), 那就独享了
entity = rq->current_entity;
if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) {
@@ -140,6 +142,7 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
}
}
+//这里是如果没有job, 那就要第一个entity里的一个job
list_for_each_entry(entity, &rq->entities, list) {
if (drm_sched_entity_is_ready(entity)) {
@@ -732,6 +735,7 @@ static int drm_sched_main(void *param)
if (!IS_ERR_OR_NULL(fence)) {
s_fence->parent = dma_fence_get(fence);
+ //job完成的fence. 通知调度器job完成, 然后调度器signal finished fence
r = dma_fence_add_callback(fence, &sched_job->cb,
drm_sched_process_job);
if (r == -ENOENT)