Commit 0cafc62e authored by Ralph Campbell's avatar Ralph Campbell Committed by Jason Gunthorpe

nouveau/hmm: fault one page at a time

The SVM page fault handler groups faults into a range of contiguous
virtual addresses and requests hmm_range_fault() to populate and return
the page frame number of system memory mapped by the CPU.  In preparation
for supporting large pages to be mapped by the GPU, process faults one
page at a time. In addition, use the hmm_range default_flags to fix a
corner case where the input hmm_pfns array is not reinitialized after
hmm_range_fault() returns -EBUSY and must be called again.

Link: https://lore.kernel.org/r/20200701225352.9649-2-rcampbell@nvidia.comSigned-off-by: default avatarRalph Campbell <rcampbell@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent e478425b
...@@ -516,7 +516,7 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = { ...@@ -516,7 +516,7 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
struct hmm_range *range, u64 *ioctl_addr) struct hmm_range *range, u64 *ioctl_addr)
{ {
unsigned long i, npages; struct page *page;
/* /*
* The ioctl_addr prepared here is passed through nvif_object_ioctl() * The ioctl_addr prepared here is passed through nvif_object_ioctl()
...@@ -525,42 +525,38 @@ static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, ...@@ -525,42 +525,38 @@ static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
* This is all just encoding the internal hmm representation into a * This is all just encoding the internal hmm representation into a
* different nouveau internal representation. * different nouveau internal representation.
*/ */
npages = (range->end - range->start) >> PAGE_SHIFT; if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
for (i = 0; i < npages; ++i) { ioctl_addr[0] = 0;
struct page *page; return;
if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
ioctl_addr[i] = 0;
continue;
}
page = hmm_pfn_to_page(range->hmm_pfns[i]);
if (is_device_private_page(page))
ioctl_addr[i] = nouveau_dmem_page_addr(page) |
NVIF_VMM_PFNMAP_V0_V |
NVIF_VMM_PFNMAP_V0_VRAM;
else
ioctl_addr[i] = page_to_phys(page) |
NVIF_VMM_PFNMAP_V0_V |
NVIF_VMM_PFNMAP_V0_HOST;
if (range->hmm_pfns[i] & HMM_PFN_WRITE)
ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
} }
page = hmm_pfn_to_page(range->hmm_pfns[0]);
if (is_device_private_page(page))
ioctl_addr[0] = nouveau_dmem_page_addr(page) |
NVIF_VMM_PFNMAP_V0_V |
NVIF_VMM_PFNMAP_V0_VRAM;
else
ioctl_addr[0] = page_to_phys(page) |
NVIF_VMM_PFNMAP_V0_V |
NVIF_VMM_PFNMAP_V0_HOST;
if (range->hmm_pfns[0] & HMM_PFN_WRITE)
ioctl_addr[0] |= NVIF_VMM_PFNMAP_V0_W;
} }
static int nouveau_range_fault(struct nouveau_svmm *svmm, static int nouveau_range_fault(struct nouveau_svmm *svmm,
struct nouveau_drm *drm, void *data, u32 size, struct nouveau_drm *drm, void *data, u32 size,
unsigned long hmm_pfns[], u64 *ioctl_addr, u64 *ioctl_addr, unsigned long hmm_flags,
struct svm_notifier *notifier) struct svm_notifier *notifier)
{ {
unsigned long timeout = unsigned long timeout =
jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
/* Have HMM fault pages within the fault window to the GPU. */ /* Have HMM fault pages within the fault window to the GPU. */
unsigned long hmm_pfns[1];
struct hmm_range range = { struct hmm_range range = {
.notifier = &notifier->notifier, .notifier = &notifier->notifier,
.start = notifier->notifier.interval_tree.start, .start = notifier->notifier.interval_tree.start,
.end = notifier->notifier.interval_tree.last + 1, .end = notifier->notifier.interval_tree.last + 1,
.pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE, .default_flags = hmm_flags,
.hmm_pfns = hmm_pfns, .hmm_pfns = hmm_pfns,
}; };
struct mm_struct *mm = notifier->notifier.mm; struct mm_struct *mm = notifier->notifier.mm;
...@@ -575,11 +571,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, ...@@ -575,11 +571,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
ret = hmm_range_fault(&range); ret = hmm_range_fault(&range);
mmap_read_unlock(mm); mmap_read_unlock(mm);
if (ret) { if (ret) {
/*
* FIXME: the input PFN_REQ flags are destroyed on
* -EBUSY, we need to regenerate them, also for the
* other continue below
*/
if (ret == -EBUSY) if (ret == -EBUSY)
continue; continue;
return ret; return ret;
...@@ -614,17 +605,12 @@ nouveau_svm_fault(struct nvif_notify *notify) ...@@ -614,17 +605,12 @@ nouveau_svm_fault(struct nvif_notify *notify)
struct nvif_object *device = &svm->drm->client.device.object; struct nvif_object *device = &svm->drm->client.device.object;
struct nouveau_svmm *svmm; struct nouveau_svmm *svmm;
struct { struct {
struct { struct nouveau_pfnmap_args i;
struct nvif_ioctl_v0 i; u64 phys[1];
struct nvif_ioctl_mthd_v0 m;
struct nvif_vmm_pfnmap_v0 p;
} i;
u64 phys[16];
} args; } args;
unsigned long hmm_pfns[ARRAY_SIZE(args.phys)]; unsigned long hmm_flags;
struct vm_area_struct *vma;
u64 inst, start, limit; u64 inst, start, limit;
int fi, fn, pi, fill; int fi, fn;
int replay = 0, ret; int replay = 0, ret;
/* Parse available fault buffer entries into a cache, and update /* Parse available fault buffer entries into a cache, and update
...@@ -691,66 +677,53 @@ nouveau_svm_fault(struct nvif_notify *notify) ...@@ -691,66 +677,53 @@ nouveau_svm_fault(struct nvif_notify *notify)
* window into a single update. * window into a single update.
*/ */
start = buffer->fault[fi]->addr; start = buffer->fault[fi]->addr;
limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT); limit = start + PAGE_SIZE;
if (start < svmm->unmanaged.limit) if (start < svmm->unmanaged.limit)
limit = min_t(u64, limit, svmm->unmanaged.start); limit = min_t(u64, limit, svmm->unmanaged.start);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
mm = svmm->notifier.mm; /*
if (!mmget_not_zero(mm)) { * Prepare the GPU-side update of all pages within the
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); * fault window, determining required pages and access
continue; * permissions based on pending faults.
}
/* Intersect fault window with the CPU VMA, cancelling
* the fault if the address is invalid.
*/ */
mmap_read_lock(mm); args.i.p.addr = start;
vma = find_vma_intersection(mm, start, limit); args.i.p.page = PAGE_SHIFT;
if (!vma) { args.i.p.size = PAGE_SIZE;
SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit); /*
mmap_read_unlock(mm); * Determine required permissions based on GPU fault
mmput(mm); * access flags.
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); * XXX: atomic?
continue; */
switch (buffer->fault[fi]->access) {
case 0: /* READ. */
hmm_flags = HMM_PFN_REQ_FAULT;
break;
case 3: /* PREFETCH. */
hmm_flags = 0;
break;
default:
hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
break;
} }
start = max_t(u64, start, vma->vm_start);
limit = min_t(u64, limit, vma->vm_end);
mmap_read_unlock(mm);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
if (buffer->fault[fi]->addr != start) { mm = svmm->notifier.mm;
SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr); if (!mmget_not_zero(mm)) {
mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]); nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue; continue;
} }
/* Prepare the GPU-side update of all pages within the notifier.svmm = svmm;
* fault window, determining required pages and access ret = mmu_interval_notifier_insert(&notifier.notifier, mm,
* permissions based on pending faults. args.i.p.addr, args.i.p.size,
*/ &nouveau_svm_mni_ops);
args.i.p.page = PAGE_SHIFT; if (!ret) {
args.i.p.addr = start; ret = nouveau_range_fault(svmm, svm->drm, &args,
for (fn = fi, pi = 0;;) { sizeof(args), args.phys, hmm_flags, &notifier);
/* Determine required permissions based on GPU fault mmu_interval_notifier_remove(&notifier.notifier);
* access flags. }
*XXX: atomic? mmput(mm);
*/
switch (buffer->fault[fn]->access) {
case 0: /* READ. */
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
break;
case 3: /* PREFETCH. */
hmm_pfns[pi++] = 0;
break;
default:
hmm_pfns[pi++] = HMM_PFN_REQ_FAULT |
HMM_PFN_REQ_WRITE;
break;
}
args.i.p.size = pi << PAGE_SHIFT;
for (fn = fi; ++fn < buffer->fault_nr; ) {
/* It's okay to skip over duplicate addresses from the /* It's okay to skip over duplicate addresses from the
* same SVMM as faults are ordered by access type such * same SVMM as faults are ordered by access type such
* that only the first one needs to be handled. * that only the first one needs to be handled.
...@@ -758,61 +731,21 @@ nouveau_svm_fault(struct nvif_notify *notify) ...@@ -758,61 +731,21 @@ nouveau_svm_fault(struct nvif_notify *notify)
* ie. WRITE faults appear first, thus any handling of * ie. WRITE faults appear first, thus any handling of
* pending READ faults will already be satisfied. * pending READ faults will already be satisfied.
*/ */
while (++fn < buffer->fault_nr && if (buffer->fault[fn]->svmm != svmm ||
buffer->fault[fn]->svmm == svmm &&
buffer->fault[fn ]->addr ==
buffer->fault[fn - 1]->addr);
/* If the next fault is outside the window, or all GPU
* faults have been dealt with, we're done here.
*/
if (fn >= buffer->fault_nr ||
buffer->fault[fn]->svmm != svmm ||
buffer->fault[fn]->addr >= limit) buffer->fault[fn]->addr >= limit)
break; break;
/* Fill in the gap between this fault and the next. */
fill = (buffer->fault[fn ]->addr -
buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
while (--fill)
hmm_pfns[pi++] = 0;
} }
SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)", /* If handling failed completely, cancel all faults. */
args.i.p.addr, if (ret) {
args.i.p.addr + args.i.p.size, fn - fi); while (fi < fn) {
struct nouveau_svm_fault *fault =
notifier.svmm = svmm; buffer->fault[fi++];
ret = mmu_interval_notifier_insert(&notifier.notifier,
svmm->notifier.mm,
args.i.p.addr, args.i.p.size,
&nouveau_svm_mni_ops);
if (!ret) {
ret = nouveau_range_fault(
svmm, svm->drm, &args,
sizeof(args.i) + pi * sizeof(args.phys[0]),
hmm_pfns, args.phys, &notifier);
mmu_interval_notifier_remove(&notifier.notifier);
}
mmput(mm);
/* Cancel any faults in the window whose pages didn't manage
* to keep their valid bit, or stay writeable when required.
*
* If handling failed completely, cancel all faults.
*/
while (fi < fn) {
struct nouveau_svm_fault *fault = buffer->fault[fi++];
pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
if (ret ||
!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) ||
(!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
fault->access != 0 && fault->access != 3)) {
nouveau_svm_fault_cancel_fault(svm, fault); nouveau_svm_fault_cancel_fault(svm, fault);
continue;
} }
} else
replay++; replay++;
}
} }
/* Issue fault replay to the GPU. */ /* Issue fault replay to the GPU. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment