Commit 720c8404 authored by Mark Hairgrove's avatar Mark Hairgrove Committed by Michael Ellerman

powerpc/npu-dma.c: Fix crash after __mmu_notifier_register failure

pnv_npu2_init_context wasn't checking the return code from
__mmu_notifier_register. If  __mmu_notifier_register failed, the
npu_context was still assigned to the mm and the caller wasn't given any
indication that things went wrong. Later on pnv_npu2_destroy_context would
be called, which in turn called mmu_notifier_unregister and dropped
mm->mm_count without having incremented it in the first place. This led to
various forms of corruption like mm use-after-free and mm double-free.

__mmu_notifier_register can fail with EINTR if a signal is pending, so
this case can be frequent.

This patch calls opal_npu_destroy_context on the failure paths, and makes
sure not to assign mm->context.npu_context until past the failure points.
Signed-off-by: default avatarMark Hairgrove <mhairgrove@nvidia.com>
Acked-By: default avatarAlistair Popple <alistair@popple.id.au>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent c2be663d
...@@ -724,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, ...@@ -724,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */ /* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return ERR_PTR(-ENODEV);
if (!mm || mm->context.id == 0) { if (!mm || mm->context.id == 0) {
/* /*
* Kernel thread contexts are not supported and context id 0 is * Kernel thread contexts are not supported and context id 0 is
...@@ -751,25 +756,30 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, ...@@ -751,25 +756,30 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/ */
npu_context = mm->context.npu_context; npu_context = mm->context.npu_context;
if (!npu_context) { if (!npu_context) {
rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
if (!npu_context) if (npu_context) {
return ERR_PTR(-ENOMEM); kref_init(&npu_context->kref);
mm->context.npu_context = npu_context;
npu_context->mm = mm; npu_context->mm = mm;
npu_context->mn.ops = &nv_nmmu_notifier_ops; npu_context->mn.ops = &nv_nmmu_notifier_ops;
__mmu_notifier_register(&npu_context->mn, mm); rc = __mmu_notifier_register(&npu_context->mn, mm);
kref_init(&npu_context->kref); }
if (rc) {
kfree(npu_context);
opal_npu_destroy_context(nphb->opal_id, mm->context.id,
PCI_DEVID(gpdev->bus->number,
gpdev->devfn));
return ERR_PTR(rc);
}
mm->context.npu_context = npu_context;
} else { } else {
kref_get(&npu_context->kref); WARN_ON(!kref_get_unless_zero(&npu_context->kref));
} }
npu_context->release_cb = cb; npu_context->release_cb = cb;
npu_context->priv = priv; npu_context->priv = priv;
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return ERR_PTR(-ENODEV);
/* /*
* npdev is a pci_dev pointer setup by the PCI code. We assign it to * npdev is a pci_dev pointer setup by the PCI code. We assign it to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment