Commit 1411796f authored by Mikko Perttunen's avatar Mikko Perttunen Committed by Thierry Reding

gpu: host1x: Rewrite job opcode sequence

For new (Tegra186+) SoCs, use a new ('full-featured') job opcode
sequence that is compatible with virtualization. In particular,
the Host1x hardware in Tegra234 is more strict regarding the sequence,
requiring ACQUIRE_MLOCK-SETCLASS-SETSTREAMID opcodes to occur in
that sequence without gaps (except for SETPAYLOAD), so let's do it
properly in one go now.
Signed-off-by: default avatarMikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: default avatarThierry Reding <treding@nvidia.com>
parent 9abdd497
...@@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, ...@@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
} }
} }
static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
u32 next_class) u32 next_class)
{ {
#if HOST1X_HW >= 2 struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW >= 6
u32 stream_id;
/*
* If a memory context has been set, use it. Otherwise
* (if context isolation is disabled) use the engine's
* firmware stream ID.
*/
if (job->memory_context)
stream_id = job->memory_context->stream_id;
else
stream_id = job->engine_fallback_streamid;
host1x_cdma_push_wide(cdma,
host1x_opcode_setclass(
HOST1X_CLASS_HOST1X,
HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
BIT(0) | BIT(2)
),
threshold,
id,
HOST1X_OPCODE_NOP
);
host1x_cdma_push_wide(&job->channel->cdma,
host1x_opcode_setclass(job->class, 0, 0),
host1x_opcode_setpayload(stream_id),
host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
HOST1X_OPCODE_NOP);
#elif HOST1X_HW >= 2
host1x_cdma_push_wide(cdma, host1x_cdma_push_wide(cdma,
host1x_opcode_setclass( host1x_opcode_setclass(
HOST1X_CLASS_HOST1X, HOST1X_CLASS_HOST1X,
...@@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) ...@@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
else else
threshold = cmd->wait.threshold; threshold = cmd->wait.threshold;
submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class);
} else { } else {
struct host1x_job_gather *g = &cmd->gather; struct host1x_job_gather *g = &cmd->gather;
...@@ -180,42 +211,70 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch) ...@@ -180,42 +211,70 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
#endif #endif
} }
static void host1x_channel_program_engine_streamid(struct host1x_job *job) static void channel_program_cdma(struct host1x_job *job)
{ {
struct host1x_cdma *cdma = &job->channel->cdma;
struct host1x_syncpt *sp = job->syncpt;
#if HOST1X_HW >= 6 #if HOST1X_HW >= 6
u32 fence; u32 fence;
if (!job->memory_context) /* Enter engine class with invalid stream ID. */
return; host1x_cdma_push_wide(cdma,
host1x_opcode_acquire_mlock(job->class),
host1x_opcode_setclass(job->class, 0, 0),
host1x_opcode_setpayload(0),
host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
fence = host1x_syncpt_incr_max(job->syncpt, 1); /* Before switching stream ID to real stream ID, ensure engine is idle. */
fence = host1x_syncpt_incr_max(sp, 1);
host1x_cdma_push(&job->channel->cdma,
host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
submit_wait(job, job->syncpt->id, fence, job->class);
/* First, increment a syncpoint on OP_DONE condition.. */ /* Submit work. */
job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
/* Before releasing MLOCK, ensure engine is idle again. */
fence = host1x_syncpt_incr_max(sp, 1);
host1x_cdma_push(&job->channel->cdma, host1x_cdma_push(&job->channel->cdma,
host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
HOST1X_UCLASS_INCR_SYNCPT_COND_F(1)); HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
submit_wait(job, job->syncpt->id, fence, job->class);
/* Wait for syncpoint to increment */ /* Release MLOCK. */
host1x_cdma_push(cdma,
HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
#else
if (job->serialize) {
/*
* Force serialization by inserting a host wait for the
* previous job to finish before this one can commence.
*/
host1x_cdma_push(cdma,
host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
host1x_uclass_wait_syncpt_r(), 1),
host1x_class_host_wait_syncpt(job->syncpt->id,
host1x_syncpt_read_max(sp)));
}
host1x_cdma_push(&job->channel->cdma, /* Synchronize base register to allow using it for relative waiting */
host1x_opcode_setclass(HOST1X_CLASS_HOST1X, if (sp->base)
host1x_uclass_wait_syncpt_r(), 1), synchronize_syncpt_base(job);
host1x_class_host_wait_syncpt(job->syncpt->id, fence));
/* /* add a setclass for modules that require it */
* Now that we know the engine is idle, return to class and if (job->class)
* change stream ID. host1x_cdma_push(cdma,
*/ host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
host1x_cdma_push(&job->channel->cdma, job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
host1x_cdma_push(&job->channel->cdma, submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
host1x_opcode_setpayload(job->memory_context->stream_id),
host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
#endif #endif
} }
...@@ -223,7 +282,6 @@ static int channel_submit(struct host1x_job *job) ...@@ -223,7 +282,6 @@ static int channel_submit(struct host1x_job *job)
{ {
struct host1x_channel *ch = job->channel; struct host1x_channel *ch = job->channel;
struct host1x_syncpt *sp = job->syncpt; struct host1x_syncpt *sp = job->syncpt;
u32 user_syncpt_incrs = job->syncpt_incrs;
u32 prev_max = 0; u32 prev_max = 0;
u32 syncval; u32 syncval;
int err; int err;
...@@ -251,6 +309,7 @@ static int channel_submit(struct host1x_job *job) ...@@ -251,6 +309,7 @@ static int channel_submit(struct host1x_job *job)
host1x_channel_set_streamid(ch); host1x_channel_set_streamid(ch);
host1x_enable_gather_filter(ch); host1x_enable_gather_filter(ch);
host1x_hw_syncpt_assign_to_channel(host, sp, ch);
/* begin a CDMA submit */ /* begin a CDMA submit */
err = host1x_cdma_begin(&ch->cdma, job); err = host1x_cdma_begin(&ch->cdma, job);
...@@ -259,40 +318,7 @@ static int channel_submit(struct host1x_job *job) ...@@ -259,40 +318,7 @@ static int channel_submit(struct host1x_job *job)
goto error; goto error;
} }
if (job->serialize) { channel_program_cdma(job);
/*
* Force serialization by inserting a host wait for the
* previous job to finish before this one can commence.
*/
host1x_cdma_push(&ch->cdma,
host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
host1x_uclass_wait_syncpt_r(), 1),
host1x_class_host_wait_syncpt(job->syncpt->id,
host1x_syncpt_read_max(sp)));
}
/* Synchronize base register to allow using it for relative waiting */
if (sp->base)
synchronize_syncpt_base(job);
host1x_hw_syncpt_assign_to_channel(host, sp, ch);
/* add a setclass for modules that require it */
if (job->class)
host1x_cdma_push(&ch->cdma,
host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
/*
* Ensure engine DMA is idle and set new stream ID. May increment
* syncpt max.
*/
host1x_channel_program_engine_streamid(job);
syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
job->syncpt_end = syncval;
submit_gathers(job, syncval - user_syncpt_incrs);
/* end CDMA submit & stash pinned hMems into sync queue */ /* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job); host1x_cdma_end(&ch->cdma, job);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment