i915_active.c 18.1 KB
Newer Older
1 2 3 4 5 6
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2019 Intel Corporation
 */

7 8
#include <linux/debugobjects.h>

9 10
#include "gt/intel_engine_pm.h"

11 12
#include "i915_drv.h"
#include "i915_active.h"
13
#include "i915_globals.h"
14 15 16

#define BKL(ref) (&(ref)->i915->drm.struct_mutex)

17 18 19 20 21 22 23 24
/*
 * Active refs memory management
 *
 * To be more economical with memory, we reap all the i915_active trees as
 * they idle (when we know the active requests are inactive) and allocate the
 * nodes from a local slab cache to hopefully reduce the fragmentation.
 */
static struct i915_global_active {
25
	struct i915_global base;
26 27 28
	struct kmem_cache *slab_cache;
} global;

29
struct active_node {
30
	struct i915_active_request base;
31 32 33 34 35
	struct i915_active *ref;
	struct rb_node node;
	u64 timeline;
};

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
static inline struct active_node *
node_from_active(struct i915_active_request *active)
{
	return container_of(active, struct active_node, base);
}

#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)

static inline bool is_barrier(const struct i915_active_request *active)
{
	return IS_ERR(rcu_access_pointer(active->request));
}

static inline struct llist_node *barrier_to_ll(struct active_node *node)
{
	GEM_BUG_ON(!is_barrier(&node->base));
	return (struct llist_node *)&node->base.link;
}

55 56 57 58 59 60
static inline struct intel_engine_cs *
__barrier_to_engine(struct active_node *node)
{
	return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev);
}

61 62 63 64
static inline struct intel_engine_cs *
barrier_to_engine(struct active_node *node)
{
	GEM_BUG_ON(!is_barrier(&node->base));
65
	return __barrier_to_engine(node);
66 67 68 69 70 71 72 73
}

static inline struct active_node *barrier_from_ll(struct llist_node *x)
{
	return container_of((struct list_head *)x,
			    struct active_node, base.link);
}

74 75 76 77 78 79
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)

static void *active_debug_hint(void *addr)
{
	struct i915_active *ref = addr;

80
	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
}

static struct debug_obj_descr active_debug_desc = {
	.name = "i915_active",
	.debug_hint = active_debug_hint,
};

static void debug_active_init(struct i915_active *ref)
{
	debug_object_init(ref, &active_debug_desc);
}

static void debug_active_activate(struct i915_active *ref)
{
	debug_object_activate(ref, &active_debug_desc);
}

static void debug_active_deactivate(struct i915_active *ref)
{
	debug_object_deactivate(ref, &active_debug_desc);
}

static void debug_active_fini(struct i915_active *ref)
{
	debug_object_free(ref, &active_debug_desc);
}

static void debug_active_assert(struct i915_active *ref)
{
	debug_object_assert_init(ref, &active_debug_desc);
}

#else

static inline void debug_active_init(struct i915_active *ref) { }
static inline void debug_active_activate(struct i915_active *ref) { }
static inline void debug_active_deactivate(struct i915_active *ref) { }
static inline void debug_active_fini(struct i915_active *ref) { }
static inline void debug_active_assert(struct i915_active *ref) { }

#endif

123
static void
124
__active_retire(struct i915_active *ref)
125 126
{
	struct active_node *it, *n;
127 128 129 130 131 132 133 134 135 136 137 138 139
	struct rb_root root;
	bool retire = false;

	lockdep_assert_held(&ref->mutex);

	/* return the unused nodes to our slabcache -- flushing the allocator */
	if (atomic_dec_and_test(&ref->count)) {
		debug_active_deactivate(ref);
		root = ref->tree;
		ref->tree = RB_ROOT;
		ref->cache = NULL;
		retire = true;
	}
140

141 142 143 144 145 146 147
	mutex_unlock(&ref->mutex);
	if (!retire)
		return;

	ref->retire(ref);

	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
148
		GEM_BUG_ON(i915_active_request_isset(&it->base));
149
		kmem_cache_free(global.slab_cache, it);
150 151 152
	}
}

153
static void
154
active_retire(struct i915_active *ref)
155
{
156 157
	GEM_BUG_ON(!atomic_read(&ref->count));
	if (atomic_add_unless(&ref->count, -1, 1))
158 159
		return;

160 161 162
	/* One active may be flushed from inside the acquire of another */
	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
	__active_retire(ref);
163 164 165
}

static void
166
node_retire(struct i915_active_request *base, struct i915_request *rq)
167
{
168
	active_retire(node_from_active(base)->ref);
169 170
}

171
static struct i915_active_request *
172
active_instance(struct i915_active *ref, struct intel_timeline *tl)
173
{
174
	struct active_node *node, *prealloc;
175
	struct rb_node **p, *parent;
176
	u64 idx = tl->fence_context;
177 178 179 180 181 182 183 184

	/*
	 * We track the most recently used timeline to skip a rbtree search
	 * for the common case, under typical loads we never need the rbtree
	 * at all. We can reuse the last slot if it is empty, that is
	 * after the previous activity has been retired, or if it matches the
	 * current timeline.
	 */
185 186 187 188 189 190 191 192
	node = READ_ONCE(ref->cache);
	if (node && node->timeline == idx)
		return &node->base;

	/* Preallocate a replacement, just in case */
	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
	if (!prealloc)
		return NULL;
193

194 195
	mutex_lock(&ref->mutex);
	GEM_BUG_ON(i915_active_is_idle(ref));
196 197 198 199 200 201 202

	parent = NULL;
	p = &ref->tree.rb_node;
	while (*p) {
		parent = *p;

		node = rb_entry(parent, struct active_node, node);
203 204 205 206
		if (node->timeline == idx) {
			kmem_cache_free(global.slab_cache, prealloc);
			goto out;
		}
207 208 209 210 211 212 213

		if (node->timeline < idx)
			p = &parent->rb_right;
		else
			p = &parent->rb_left;
	}

214
	node = prealloc;
215
	i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire);
216 217 218 219 220 221 222
	node->ref = ref;
	node->timeline = idx;

	rb_link_node(&node->node, parent, p);
	rb_insert_color(&node->node, &ref->tree);

out:
223 224 225
	ref->cache = node;
	mutex_unlock(&ref->mutex);

226
	BUILD_BUG_ON(offsetof(typeof(*node), base));
227
	return &node->base;
228 229
}

230 231 232 233 234
void __i915_active_init(struct drm_i915_private *i915,
			struct i915_active *ref,
			int (*active)(struct i915_active *ref),
			void (*retire)(struct i915_active *ref),
			struct lock_class_key *key)
235
{
236 237
	debug_active_init(ref);

238
	ref->i915 = i915;
239
	ref->flags = 0;
240
	ref->active = active;
241 242
	ref->retire = retire;
	ref->tree = RB_ROOT;
243
	ref->cache = NULL;
244
	init_llist_head(&ref->preallocated_barriers);
245 246
	atomic_set(&ref->count, 0);
	__mutex_init(&ref->mutex, "i915_active", key);
247 248
}

249 250 251 252
static bool ____active_del_barrier(struct i915_active *ref,
				   struct active_node *node,
				   struct intel_engine_cs *engine)

253 254 255 256
{
	struct llist_node *head = NULL, *tail = NULL;
	struct llist_node *pos, *next;

257
	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

	/*
	 * Rebuild the llist excluding our node. We may perform this
	 * outside of the kernel_context timeline mutex and so someone
	 * else may be manipulating the engine->barrier_tasks, in
	 * which case either we or they will be upset :)
	 *
	 * A second __active_del_barrier() will report failure to claim
	 * the active_node and the caller will just shrug and know not to
	 * claim ownership of its node.
	 *
	 * A concurrent i915_request_add_active_barriers() will miss adding
	 * any of the tasks, but we will try again on the next -- and since
	 * we are actively using the barrier, we know that there will be
	 * at least another opportunity when we idle.
	 */
	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
		if (node == barrier_from_ll(pos)) {
			node = NULL;
			continue;
		}

		pos->next = head;
		head = pos;
		if (!tail)
			tail = pos;
	}
	if (head)
		llist_add_batch(head, tail, &engine->barrier_tasks);

	return !node;
}

291 292 293 294 295 296
static bool
__active_del_barrier(struct i915_active *ref, struct active_node *node)
{
	return ____active_del_barrier(ref, node, barrier_to_engine(node));
}

297
int i915_active_ref(struct i915_active *ref,
298
		    struct intel_timeline *tl,
299 300
		    struct i915_request *rq)
{
301
	struct i915_active_request *active;
302
	int err;
303

304 305
	lockdep_assert_held(&tl->mutex);

306
	/* Prevent reaping in case we malloc/wait while building the tree */
307 308 309
	err = i915_active_acquire(ref);
	if (err)
		return err;
310

311
	active = active_instance(ref, tl);
312 313
	if (!active) {
		err = -ENOMEM;
314 315
		goto out;
	}
316

317 318 319 320 321 322 323 324 325 326 327 328 329 330
	if (is_barrier(active)) { /* proto-node used by our idle barrier */
		/*
		 * This request is on the kernel_context timeline, and so
		 * we can use it to substitute for the pending idle-barrer
		 * request that we want to emit on the kernel_context.
		 */
		__active_del_barrier(ref, node_from_active(active));
		RCU_INIT_POINTER(active->request, NULL);
		INIT_LIST_HEAD(&active->link);
	} else {
		if (!i915_active_request_isset(active))
			atomic_inc(&ref->count);
	}
	GEM_BUG_ON(!atomic_read(&ref->count));
331
	__i915_active_request_set(active, rq);
332

333 334 335
out:
	i915_active_release(ref);
	return err;
336 337
}

338
int i915_active_acquire(struct i915_active *ref)
339
{
340 341
	int err;

342
	debug_active_assert(ref);
343 344
	if (atomic_add_unless(&ref->count, 1, 0))
		return 0;
345

346 347 348
	err = mutex_lock_interruptible(&ref->mutex);
	if (err)
		return err;
349

350 351 352 353 354 355 356 357 358 359
	if (!atomic_read(&ref->count) && ref->active)
		err = ref->active(ref);
	if (!err) {
		debug_active_activate(ref);
		atomic_inc(&ref->count);
	}

	mutex_unlock(&ref->mutex);

	return err;
360 361 362 363
}

void i915_active_release(struct i915_active *ref)
{
364
	debug_active_assert(ref);
365
	active_retire(ref);
366 367
}

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
static void __active_ungrab(struct i915_active *ref)
{
	clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags);
}

bool i915_active_trygrab(struct i915_active *ref)
{
	debug_active_assert(ref);

	if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags))
		return false;

	if (!atomic_add_unless(&ref->count, 1, 0)) {
		__active_ungrab(ref);
		return false;
	}

	return true;
}

void i915_active_ungrab(struct i915_active *ref)
{
	GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags));

	active_retire(ref);
	__active_ungrab(ref);
}

396 397 398
int i915_active_wait(struct i915_active *ref)
{
	struct active_node *it, *n;
399
	int err;
400

401
	might_sleep();
402 403
	might_lock(&ref->mutex);

404
	if (i915_active_is_idle(ref))
405
		return 0;
406

407 408 409 410 411 412 413 414
	err = mutex_lock_interruptible(&ref->mutex);
	if (err)
		return err;

	if (!atomic_add_unless(&ref->count, 1, 0)) {
		mutex_unlock(&ref->mutex);
		return 0;
	}
415 416

	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
417 418 419 420 421
		if (is_barrier(&it->base)) { /* unconnected idle-barrier */
			err = -EBUSY;
			break;
		}

422 423
		err = i915_active_request_retire(&it->base, BKL(ref));
		if (err)
424 425 426
			break;
	}

427
	__active_retire(ref);
428 429 430
	if (err)
		return err;

431 432 433
	if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE))
		return -EINTR;

434 435 436 437
	if (!i915_active_is_idle(ref))
		return -EBUSY;

	return 0;
438 439
}

440 441
int i915_request_await_active_request(struct i915_request *rq,
				      struct i915_active_request *active)
442 443
{
	struct i915_request *barrier =
444
		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
445 446 447 448 449 450 451

	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
}

int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
{
	struct active_node *it, *n;
452
	int err;
453

454 455
	if (RB_EMPTY_ROOT(&ref->tree))
		return 0;
456

457 458
	/* await allocates and so we need to avoid hitting the shrinker */
	err = i915_active_acquire(ref);
459
	if (err)
460
		return err;
461

462
	mutex_lock(&ref->mutex);
463
	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
464 465
		err = i915_request_await_active_request(rq, &it->base);
		if (err)
466
			break;
467
	}
468
	mutex_unlock(&ref->mutex);
469

470 471
	i915_active_release(ref);
	return err;
472 473
}

474
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
475 476
void i915_active_fini(struct i915_active *ref)
{
477
	debug_active_fini(ref);
478
	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
479 480
	GEM_BUG_ON(atomic_read(&ref->count));
	mutex_destroy(&ref->mutex);
481
}
482
#endif
483

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
{
	return node->timeline == idx && !i915_active_request_isset(&node->base);
}

static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
{
	struct rb_node *prev, *p;

	if (RB_EMPTY_ROOT(&ref->tree))
		return NULL;

	mutex_lock(&ref->mutex);
	GEM_BUG_ON(i915_active_is_idle(ref));

	/*
	 * Try to reuse any existing barrier nodes already allocated for this
	 * i915_active, due to overlapping active phases there is likely a
	 * node kept alive (as we reuse before parking). We prefer to reuse
	 * completely idle barriers (less hassle in manipulating the llists),
	 * but otherwise any will do.
	 */
	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
		p = &ref->cache->node;
		goto match;
	}

	prev = NULL;
	p = ref->tree.rb_node;
	while (p) {
		struct active_node *node =
			rb_entry(p, struct active_node, node);

		if (is_idle_barrier(node, idx))
			goto match;

		prev = p;
		if (node->timeline < idx)
			p = p->rb_right;
		else
			p = p->rb_left;
	}

	/*
	 * No quick match, but we did find the leftmost rb_node for the
	 * kernel_context. Walk the rb_tree in-order to see if there were
	 * any idle-barriers on this timeline that we missed, or just use
	 * the first pending barrier.
	 */
	for (p = prev; p; p = rb_next(p)) {
		struct active_node *node =
			rb_entry(p, struct active_node, node);
536
		struct intel_engine_cs *engine;
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553

		if (node->timeline > idx)
			break;

		if (node->timeline < idx)
			continue;

		if (is_idle_barrier(node, idx))
			goto match;

		/*
		 * The list of pending barriers is protected by the
		 * kernel_context timeline, which notably we do not hold
		 * here. i915_request_add_active_barriers() may consume
		 * the barrier before we claim it, so we have to check
		 * for success.
		 */
554 555 556 557
		engine = __barrier_to_engine(node);
		smp_rmb(); /* serialise with add_active_barriers */
		if (is_barrier(&node->base) &&
		    ____active_del_barrier(ref, node, engine))
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
			goto match;
	}

	mutex_unlock(&ref->mutex);

	return NULL;

match:
	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
	if (p == &ref->cache->node)
		ref->cache = NULL;
	mutex_unlock(&ref->mutex);

	return rb_entry(p, struct active_node, node);
}

574 575 576 577
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
					    struct intel_engine_cs *engine)
{
	struct drm_i915_private *i915 = engine->i915;
578
	intel_engine_mask_t tmp, mask = engine->mask;
579 580
	struct llist_node *pos, *next;
	int err;
581

582 583 584 585 586 587 588 589
	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));

	/*
	 * Preallocate a node for each physical engine supporting the target
	 * engine (remember virtual engines have more than one sibling).
	 * We can then use the preallocated nodes in
	 * i915_active_acquire_barrier()
	 */
590
	for_each_engine_masked(engine, i915, mask, tmp) {
591
		u64 idx = engine->kernel_context->timeline->fence_context;
592 593
		struct active_node *node;

594 595 596 597 598 599 600 601
		node = reuse_idle_barrier(ref, idx);
		if (!node) {
			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
			if (!node) {
				err = ENOMEM;
				goto unwind;
			}

602 603 604 605
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
			node->base.lock =
				&engine->kernel_context->timeline->mutex;
#endif
606 607 608 609
			RCU_INIT_POINTER(node->base.request, NULL);
			node->base.retire = node_retire;
			node->timeline = idx;
			node->ref = ref;
610 611
		}

612 613 614 615 616 617 618 619 620 621 622 623 624 625
		if (!i915_active_request_isset(&node->base)) {
			/*
			 * Mark this as being *our* unconnected proto-node.
			 *
			 * Since this node is not in any list, and we have
			 * decoupled it from the rbtree, we can reuse the
			 * request to indicate this is an idle-barrier node
			 * and then we can use the rb_node and list pointers
			 * for our tracking of the pending barrier.
			 */
			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
			node->base.link.prev = (void *)engine;
			atomic_inc(&ref->count);
		}
626

627 628
		GEM_BUG_ON(barrier_to_engine(node) != engine);
		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
629
		intel_engine_pm_get(engine);
630 631
	}

632 633 634
	return 0;

unwind:
635 636
	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
		struct active_node *node = barrier_from_ll(pos);
637

638 639
		atomic_dec(&ref->count);
		intel_engine_pm_put(barrier_to_engine(node));
640 641 642

		kmem_cache_free(global.slab_cache, node);
	}
643 644 645 646 647 648 649
	return err;
}

void i915_active_acquire_barrier(struct i915_active *ref)
{
	struct llist_node *pos, *next;

650
	GEM_BUG_ON(i915_active_is_idle(ref));
651

652 653 654 655 656 657
	/*
	 * Transfer the list of preallocated barriers into the
	 * i915_active rbtree, but only as proto-nodes. They will be
	 * populated by i915_request_add_active_barriers() to point to the
	 * request that will eventually release them.
	 */
658
	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
659 660 661
	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
		struct active_node *node = barrier_from_ll(pos);
		struct intel_engine_cs *engine = barrier_to_engine(node);
662 663 664 665 666
		struct rb_node **p, *parent;

		parent = NULL;
		p = &ref->tree.rb_node;
		while (*p) {
667 668
			struct active_node *it;

669
			parent = *p;
670 671 672

			it = rb_entry(parent, struct active_node, node);
			if (it->timeline < node->timeline)
673 674 675 676 677 678 679
				p = &parent->rb_right;
			else
				p = &parent->rb_left;
		}
		rb_link_node(&node->node, parent, p);
		rb_insert_color(&node->node, &ref->tree);

680
		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
681
		intel_engine_pm_put(engine);
682
	}
683
	mutex_unlock(&ref->mutex);
684 685
}

686
void i915_request_add_active_barriers(struct i915_request *rq)
687 688 689 690
{
	struct intel_engine_cs *engine = rq->engine;
	struct llist_node *node, *next;

691
	GEM_BUG_ON(intel_engine_is_virtual(engine));
692
	GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline);
693 694 695 696 697 698 699 700

	/*
	 * Attach the list of proto-fences to the in-flight request such
	 * that the parent i915_active will be released when this request
	 * is retired.
	 */
	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
		RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq);
701
		smp_wmb(); /* serialise with reuse_idle_barrier */
702
		list_add_tail((struct list_head *)node, &rq->active_list);
703
	}
704 705
}

706 707 708 709 710
int i915_active_request_set(struct i915_active_request *active,
			    struct i915_request *rq)
{
	int err;

711 712 713 714
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
	lockdep_assert_held(active->lock);
#endif

715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
	/* Must maintain ordering wrt previous active requests */
	err = i915_request_await_active_request(rq, active);
	if (err)
		return err;

	__i915_active_request_set(active, rq);
	return 0;
}

void i915_active_retire_noop(struct i915_active_request *active,
			     struct i915_request *request)
{
	/* Space left intentionally blank */
}

730 731 732
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_active.c"
#endif
733

734
static void i915_global_active_shrink(void)
735
{
736
	kmem_cache_shrink(global.slab_cache);
737 738
}

739
static void i915_global_active_exit(void)
740
{
741
	kmem_cache_destroy(global.slab_cache);
742 743
}

744 745 746 747 748 749
static struct i915_global_active global = { {
	.shrink = i915_global_active_shrink,
	.exit = i915_global_active_exit,
} };

int __init i915_global_active_init(void)
750
{
751 752 753 754 755 756
	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
	if (!global.slab_cache)
		return -ENOMEM;

	i915_global_register(&global.base);
	return 0;
757
}