Commit de5825be authored by Chris Wilson's avatar Chris Wilson

drm/i915: Serialise with engine-pm around requests on the kernel_context

As the engine->kernel_context is used within the engine-pm barrier, we
have to be careful when emitting requests outside of the barrier, as the
strict timeline locking rules do not apply. Instead, we must ensure the
engine_park() cannot be entered as we build the request, which is
simplest by taking an explicit engine-pm wakeref around the request
construction.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-1-chris@chris-wilson.co.uk
parent da0ef77e
......@@ -70,6 +70,7 @@
#include <drm/i915_drm.h>
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
......@@ -1265,7 +1266,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
if (!intel_context_is_pinned(ce))
return 0;
rq = i915_request_create(ce->engine->kernel_context);
rq = intel_engine_create_kernel_request(ce->engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
......
......@@ -24,6 +24,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
prandom_seed_state(&prng, i915_selftest.random_seed);
intel_engine_pm_get(engine);
do {
const u32 max_block_size = S16_MAX * PAGE_SIZE;
u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng));
......@@ -99,6 +100,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
err_flush:
if (err == -ENOMEM)
err = 0;
intel_engine_pm_put(engine);
return err;
}
......
......@@ -6,6 +6,7 @@
#include <linux/prime_numbers.h>
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h"
......@@ -200,7 +201,7 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
if (IS_ERR(vma))
return PTR_ERR(vma);
rq = i915_request_create(ctx->engine->kernel_context);
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
return PTR_ERR(rq);
......
......@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_requests.h"
#include "gt/intel_reset.h"
......@@ -1190,9 +1191,11 @@ __sseu_test(const char *name,
struct igt_spinner *spin = NULL;
int ret;
intel_engine_pm_get(ce->engine);
ret = __sseu_prepare(name, flags, ce, &spin);
if (ret)
return ret;
goto out_pm;
ret = intel_context_reconfigure_sseu(ce, sseu);
if (ret)
......@@ -1207,6 +1210,8 @@ __sseu_test(const char *name,
igt_spinner_fini(spin);
kfree(spin);
}
out_pm:
intel_engine_pm_put(ce->engine);
return ret;
}
......
......@@ -6,6 +6,7 @@
#include <linux/prime_numbers.h>
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "huge_gem_object.h"
......@@ -536,7 +537,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
if (err)
return err;
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
return PTR_ERR(rq);
......
......@@ -41,6 +41,7 @@ static int __perf_fill_blt(struct drm_i915_gem_object *obj)
if (!engine)
return 0;
intel_engine_pm_get(engine);
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct intel_context *ce = engine->kernel_context;
ktime_t t0, t1;
......@@ -49,17 +50,20 @@ static int __perf_fill_blt(struct drm_i915_gem_object *obj)
err = i915_gem_object_fill_blt(obj, ce, 0);
if (err)
return err;
break;
err = i915_gem_object_wait(obj,
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (err)
return err;
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
}
intel_engine_pm_put(engine);
if (err)
return err;
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
pr_info("%s: blt %zd KiB fill: %lld MiB/s\n",
......@@ -109,6 +113,7 @@ static int __perf_copy_blt(struct drm_i915_gem_object *src,
struct intel_engine_cs *engine;
ktime_t t[5];
int pass;
int err = 0;
engine = intel_engine_lookup_user(i915,
I915_ENGINE_CLASS_COPY,
......@@ -116,26 +121,29 @@ static int __perf_copy_blt(struct drm_i915_gem_object *src,
if (!engine)
return 0;
intel_engine_pm_get(engine);
for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct intel_context *ce = engine->kernel_context;
ktime_t t0, t1;
int err;
t0 = ktime_get();
err = i915_gem_object_copy_blt(src, dst, ce);
if (err)
return err;
break;
err = i915_gem_object_wait(dst,
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (err)
return err;
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
}
intel_engine_pm_put(engine);
if (err)
return err;
sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
pr_info("%s: blt %zd KiB copy: %lld MiB/s\n",
......
......@@ -215,18 +215,26 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
int intel_engine_flush_barriers(struct intel_engine_cs *engine)
{
struct i915_request *rq;
int err = 0;
if (llist_empty(&engine->barrier_tasks))
return 0;
if (!intel_engine_pm_get_if_awake(engine))
return 0;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq))
return PTR_ERR(rq);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_rpm;
}
idle_pulse(engine, rq);
i915_request_add(rq);
return 0;
out_rpm:
intel_engine_pm_put(engine);
return err;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
......
......@@ -7,6 +7,7 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
......@@ -41,6 +42,26 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
intel_wakeref_unlock_wait(&engine->wakeref);
}
static inline struct i915_request *
intel_engine_create_kernel_request(struct intel_engine_cs *engine)
{
struct i915_request *rq;
/*
* The engine->kernel_context is special as it is used inside
* the engine-pm barrier (see __engine_park()), circumventing
* the usual mutexes and relying on the engine-pm barrier
* instead. So whenever we use the engine->kernel_context
* outside of the barrier, we must manually handle the
* engine wakeref to serialise with the use inside.
*/
intel_engine_pm_get(engine);
rq = i915_request_create(engine->kernel_context);
intel_engine_pm_put(engine);
return rq;
}
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
......@@ -6,6 +6,7 @@
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
......@@ -1582,7 +1583,9 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (IS_ERR(vma))
return PTR_ERR(vma);
intel_engine_pm_get(ce->engine);
rq = intel_context_create_request(ce);
intel_engine_pm_put(ce->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma;
......
......@@ -121,7 +121,7 @@ static int __live_context_size(struct intel_engine_cs *engine,
goto err_unpin;
/* Force the context switch */
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
......
......@@ -132,14 +132,18 @@ static int perf_mi_bb_start(void *arg)
u32 cycles[COUNT];
int i;
intel_engine_pm_get(engine);
batch = create_empty_batch(ce);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
intel_engine_pm_put(engine);
break;
}
err = i915_vma_sync(batch);
if (err) {
intel_engine_pm_put(engine);
i915_vma_put(batch);
break;
}
......@@ -180,6 +184,7 @@ static int perf_mi_bb_start(void *arg)
cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
}
i915_vma_put(batch);
intel_engine_pm_put(engine);
if (err)
break;
......@@ -251,15 +256,19 @@ static int perf_mi_noop(void *arg)
u32 cycles[COUNT];
int i;
intel_engine_pm_get(engine);
base = create_empty_batch(ce);
if (IS_ERR(base)) {
err = PTR_ERR(base);
intel_engine_pm_put(engine);
break;
}
err = i915_vma_sync(base);
if (err) {
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
......@@ -267,6 +276,7 @@ static int perf_mi_noop(void *arg)
if (IS_ERR(nop)) {
err = PTR_ERR(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
......@@ -274,6 +284,7 @@ static int perf_mi_noop(void *arg)
if (err) {
i915_vma_put(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
break;
}
......@@ -327,6 +338,7 @@ static int perf_mi_noop(void *arg)
}
i915_vma_put(nop);
i915_vma_put(base);
intel_engine_pm_put(engine);
if (err)
break;
......
......@@ -348,7 +348,7 @@ release_queue(struct intel_engine_cs *engine,
struct i915_request *rq;
u32 *cs;
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
......@@ -497,7 +497,7 @@ static struct i915_request *nop_request(struct intel_engine_cs *engine)
{
struct i915_request *rq;
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return rq;
......@@ -3698,7 +3698,7 @@ static int gpr_make_dirty(struct intel_engine_cs *engine)
u32 *cs;
int n;
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
......
......@@ -261,7 +261,9 @@ static int live_mocs_kernel(void *arg)
return err;
for_each_engine(engine, gt, id) {
intel_engine_pm_get(engine);
err = check_mocs_engine(&mocs, engine->kernel_context);
intel_engine_pm_put(engine);
if (err)
break;
}
......
......@@ -458,7 +458,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
goto out;
}
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
goto out_unpin;
......@@ -675,9 +675,7 @@ static int live_hwsp_wrap(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
intel_engine_pm_get(engine);
rq = i915_request_create(engine->kernel_context);
intel_engine_pm_put(engine);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
......
......@@ -1968,7 +1968,9 @@ static int emit_oa_config(struct i915_perf_stream *stream,
if (err)
goto err_vma_put;
intel_engine_pm_get(ce->engine);
rq = i915_request_create(ce);
intel_engine_pm_put(ce->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma_unpin;
......@@ -2165,7 +2167,7 @@ static int gen8_modify_context(struct intel_context *ce,
lockdep_assert_held(&ce->pin_mutex);
rq = i915_request_create(ce->engine->kernel_context);
rq = intel_engine_create_kernel_request(ce->engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
......
......@@ -99,7 +99,7 @@ __live_active_setup(struct drm_i915_private *i915)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
rq = i915_request_create(engine->kernel_context);
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
......
......@@ -132,7 +132,7 @@ static int live_noa_delay(void *arg)
for (i = 0; i < 4; i++)
intel_write_status_page(stream->engine, 0x100 + i, 0);
rq = i915_request_create(stream->engine->kernel_context);
rq = intel_engine_create_kernel_request(stream->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
......
......@@ -27,6 +27,7 @@
#include "gem/i915_gem_pm.h"
#include "gem/selftests/mock_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h"
#include "i915_random.h"
......@@ -541,6 +542,7 @@ static int live_nop_request(void *arg)
if (err)
return err;
intel_engine_pm_get(engine);
for_each_prime_number_from(prime, 1, 8192) {
struct i915_request *request = NULL;
......@@ -579,6 +581,7 @@ static int live_nop_request(void *arg)
if (__igt_timeout(end_time, NULL))
break;
}
intel_engine_pm_put(engine);
err = igt_live_test_end(&t);
if (err)
......@@ -693,10 +696,13 @@ static int live_empty_request(void *arg)
if (err)
goto out_batch;
intel_engine_pm_get(engine);
/* Warmup / preload */
request = empty_request(engine, batch);
if (IS_ERR(request)) {
err = PTR_ERR(request);
intel_engine_pm_put(engine);
goto out_batch;
}
i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
......@@ -709,6 +715,7 @@ static int live_empty_request(void *arg)
request = empty_request(engine, batch);
if (IS_ERR(request)) {
err = PTR_ERR(request);
intel_engine_pm_put(engine);
goto out_batch;
}
}
......@@ -722,6 +729,7 @@ static int live_empty_request(void *arg)
break;
}
i915_request_put(request);
intel_engine_pm_put(engine);
err = igt_live_test_end(&t);
if (err)
......@@ -846,7 +854,7 @@ static int live_all_engines(void *arg)
idx = 0;
for_each_uabi_engine(engine, i915) {
request[idx] = i915_request_create(engine->kernel_context);
request[idx] = intel_engine_create_kernel_request(engine);
if (IS_ERR(request[idx])) {
err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed with err=%d\n",
......@@ -963,7 +971,7 @@ static int live_sequential_engines(void *arg)
goto out_free;
}
request[idx] = i915_request_create(engine->kernel_context);
request[idx] = intel_engine_create_kernel_request(engine);
if (IS_ERR(request[idx])) {
err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed for %s with err=%d\n",
......@@ -1068,15 +1076,19 @@ static int __live_parallel_engine1(void *arg)
struct intel_engine_cs *engine = arg;
IGT_TIMEOUT(end_time);
unsigned long count;
int err = 0;
count = 0;
intel_engine_pm_get(engine);
do {
struct i915_request *rq;
int err;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq))
return PTR_ERR(rq);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
if (err)
break;
}
i915_request_get(rq);
i915_request_add(rq);
......@@ -1086,13 +1098,14 @@ static int __live_parallel_engine1(void *arg)
err = -ETIME;
i915_request_put(rq);
if (err)
return err;
break;
count++;
} while (!__igt_timeout(end_time, NULL));
intel_engine_pm_put(engine);
pr_info("%s: %lu request + sync\n", engine->name, count);
return 0;
return err;
}
static int __live_parallel_engineN(void *arg)
......@@ -1100,21 +1113,26 @@ static int __live_parallel_engineN(void *arg)
struct intel_engine_cs *engine = arg;
IGT_TIMEOUT(end_time);
unsigned long count;
int err = 0;
count = 0;
intel_engine_pm_get(engine);
do {
struct i915_request *rq;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq))
return PTR_ERR(rq);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
}
i915_request_add(rq);
count++;
} while (!__igt_timeout(end_time, NULL));
intel_engine_pm_put(engine);
pr_info("%s: %lu requests\n", engine->name, count);
return 0;
return err;
}
static bool wake_all(struct drm_i915_private *i915)
......@@ -1158,9 +1176,11 @@ static int __live_parallel_spin(void *arg)
return -ENOMEM;
}
intel_engine_pm_get(engine);
rq = igt_spinner_create_request(&spin,
engine->kernel_context,
MI_NOOP); /* no preemption */
intel_engine_pm_put(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
if (err == -ENODEV)
......
......@@ -506,7 +506,9 @@ static int igt_lmem_write_cpu(void *arg)
}
/* Put the pages into a known state -- from the gpu for added fun */
intel_engine_pm_get(engine);
err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf);
intel_engine_pm_put(engine);
if (err)
goto out_unpin;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment