Commit d0cf9b56 authored by Alistair Popple's avatar Alistair Popple Committed by Michael Ellerman

powerpc/powernv/npu: Do a PID GPU TLB flush when invalidating a large address range

The NPU has a limited number of address translation shootdown (ATSD)
registers and the GPU has limited bandwidth to process ATSDs. This can
result in contention of ATSD registers leading to soft lockups on some
threads, particularly when invalidating a large address range in
pnv_npu2_mn_invalidate_range().

At some threshold it becomes more efficient to flush the entire GPU
TLB for the given MM context (PID) than individually flushing each
address in the range. This patch will result in ranges greater than
2MB being converted from 32+ ATSDs into a single ATSD which will flush
the TLB for the given PID on each GPU.

Fixes: 1ab66d1f ("powerpc/powernv: Introduce address translation services for Nvlink2")
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: default avatarAlistair Popple <alistair@popple.id.au>
Acked-by: default avatarBalbir Singh <bsingharora@gmail.com>
Tested-by: default avatarBalbir Singh <bsingharora@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent a1409ada
...@@ -39,6 +39,13 @@ ...@@ -39,6 +39,13 @@
*/ */
static DEFINE_SPINLOCK(npu_context_lock); static DEFINE_SPINLOCK(npu_context_lock);
/*
* When an address shootdown range exceeds this threshold we invalidate the
* entire TLB on the GPU for the given PID rather than each specific address in
* the range.
*/
#define ATSD_THRESHOLD (2*1024*1024)
/* /*
* Other types of TCE cache invalidation are not functional in the * Other types of TCE cache invalidation are not functional in the
* hardware. * hardware.
...@@ -677,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, ...@@ -677,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn); struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address; unsigned long address;
if (end - start > ATSD_THRESHOLD) {
/*
* Just invalidate the entire PID if the address range is too
* large.
*/
mmio_invalidate(npu_context, 0, 0, true);
} else {
for (address = start; address < end; address += PAGE_SIZE) for (address = start; address < end; address += PAGE_SIZE)
mmio_invalidate(npu_context, 1, address, false); mmio_invalidate(npu_context, 1, address, false);
/* Do the flush only on the final addess == end */ /* Do the flush only on the final addess == end */
mmio_invalidate(npu_context, 1, address, true); mmio_invalidate(npu_context, 1, address, true);
}
} }
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment