Commit 1570175a authored by Dan Williams's avatar Dan Williams Committed by Linus Torvalds

PCI/P2PDMA: track pgmap references per resource, not globally

In preparation for fixing a race between devm_memremap_pages_release()
and the final put of a page from the device-page-map, allocate a
percpu-ref per p2pdma resource mapping.

Link: http://lkml.kernel.org/r/155727338646.292046.9922678317501435597.stgit@dwillia2-desk3.amr.corp.intel.comSigned-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 795ee306
...@@ -20,12 +20,16 @@ ...@@ -20,12 +20,16 @@
#include <linux/seq_buf.h> #include <linux/seq_buf.h>
struct pci_p2pdma { struct pci_p2pdma {
struct percpu_ref devmap_ref;
struct completion devmap_ref_done;
struct gen_pool *pool; struct gen_pool *pool;
bool p2pmem_published; bool p2pmem_published;
}; };
struct p2pdma_pagemap {
struct dev_pagemap pgmap;
struct percpu_ref ref;
struct completion ref_done;
};
static ssize_t size_show(struct device *dev, struct device_attribute *attr, static ssize_t size_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
...@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = { ...@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = {
.name = "p2pmem", .name = "p2pmem",
}; };
static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref)
{
return container_of(ref, struct p2pdma_pagemap, ref);
}
static void pci_p2pdma_percpu_release(struct percpu_ref *ref) static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
{ {
struct pci_p2pdma *p2p = struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
container_of(ref, struct pci_p2pdma, devmap_ref);
complete_all(&p2p->devmap_ref_done); complete(&p2p_pgmap->ref_done);
} }
static void pci_p2pdma_percpu_kill(struct percpu_ref *ref) static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
{ {
/*
* pci_p2pdma_add_resource() may be called multiple times
* by a driver and may register the percpu_kill devm action multiple
* times. We only want the first action to actually kill the
* percpu_ref.
*/
if (percpu_ref_is_dying(ref))
return;
percpu_ref_kill(ref); percpu_ref_kill(ref);
} }
static void pci_p2pdma_percpu_cleanup(void *ref)
{
struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
wait_for_completion(&p2p_pgmap->ref_done);
percpu_ref_exit(&p2p_pgmap->ref);
}
static void pci_p2pdma_release(void *data) static void pci_p2pdma_release(void *data)
{ {
struct pci_dev *pdev = data; struct pci_dev *pdev = data;
struct pci_p2pdma *p2pdma = pdev->p2pdma;
if (!pdev->p2pdma) if (!p2pdma)
return; return;
wait_for_completion(&pdev->p2pdma->devmap_ref_done); /* Flush and disable pci_alloc_p2p_mem() */
percpu_ref_exit(&pdev->p2pdma->devmap_ref); pdev->p2pdma = NULL;
synchronize_rcu();
gen_pool_destroy(pdev->p2pdma->pool); gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
pdev->p2pdma = NULL;
} }
static int pci_p2pdma_setup(struct pci_dev *pdev) static int pci_p2pdma_setup(struct pci_dev *pdev)
...@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) ...@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
if (!p2p->pool) if (!p2p->pool)
goto out; goto out;
init_completion(&p2p->devmap_ref_done);
error = percpu_ref_init(&p2p->devmap_ref,
pci_p2pdma_percpu_release, 0, GFP_KERNEL);
if (error)
goto out_pool_destroy;
error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
if (error) if (error)
goto out_pool_destroy; goto out_pool_destroy;
...@@ -163,6 +165,7 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) ...@@ -163,6 +165,7 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset) u64 offset)
{ {
struct p2pdma_pagemap *p2p_pgmap;
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
void *addr; void *addr;
int error; int error;
...@@ -185,14 +188,32 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, ...@@ -185,14 +188,32 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return error; return error;
} }
pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL); p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
if (!pgmap) if (!p2p_pgmap)
return -ENOMEM; return -ENOMEM;
init_completion(&p2p_pgmap->ref_done);
error = percpu_ref_init(&p2p_pgmap->ref,
pci_p2pdma_percpu_release, 0, GFP_KERNEL);
if (error)
goto pgmap_free;
/*
* FIXME: the percpu_ref_exit needs to be coordinated internal
* to devm_memremap_pages_release(). Duplicate the same ordering
* as other devm_memremap_pages() users for now.
*/
error = devm_add_action(&pdev->dev, pci_p2pdma_percpu_cleanup,
&p2p_pgmap->ref);
if (error)
goto ref_cleanup;
pgmap = &p2p_pgmap->pgmap;
pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->res.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1; pgmap->res.end = pgmap->res.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar); pgmap->res.flags = pci_resource_flags(pdev, bar);
pgmap->ref = &pdev->p2pdma->devmap_ref; pgmap->ref = &p2p_pgmap->ref;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) - pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
pci_resource_start(pdev, bar); pci_resource_start(pdev, bar);
...@@ -201,12 +222,13 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, ...@@ -201,12 +222,13 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
addr = devm_memremap_pages(&pdev->dev, pgmap); addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) { if (IS_ERR(addr)) {
error = PTR_ERR(addr); error = PTR_ERR(addr);
goto pgmap_free; goto ref_exit;
} }
error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr, error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset, pci_bus_address(pdev, bar) + offset,
resource_size(&pgmap->res), dev_to_node(&pdev->dev)); resource_size(&pgmap->res), dev_to_node(&pdev->dev),
&p2p_pgmap->ref);
if (error) if (error)
goto pages_free; goto pages_free;
...@@ -217,8 +239,10 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, ...@@ -217,8 +239,10 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pages_free: pages_free:
devm_memunmap_pages(&pdev->dev, pgmap); devm_memunmap_pages(&pdev->dev, pgmap);
ref_cleanup:
percpu_ref_exit(&p2p_pgmap->ref);
pgmap_free: pgmap_free:
devm_kfree(&pdev->dev, pgmap); devm_kfree(&pdev->dev, p2p_pgmap);
return error; return error;
} }
EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource); EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
...@@ -587,19 +611,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many); ...@@ -587,19 +611,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
*/ */
void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
{ {
void *ret; void *ret = NULL;
struct percpu_ref *ref;
/*
* Pairs with synchronize_rcu() in pci_p2pdma_release() to
* ensure pdev->p2pdma is non-NULL for the duration of the
* read-lock.
*/
rcu_read_lock();
if (unlikely(!pdev->p2pdma)) if (unlikely(!pdev->p2pdma))
return NULL; goto out;
if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
return NULL;
ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
if (unlikely(!ret)) ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
percpu_ref_put(&pdev->p2pdma->devmap_ref); (void **) &ref);
if (!ret)
goto out;
if (unlikely(!percpu_ref_tryget_live(ref))) {
gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
ret = NULL;
goto out;
}
out:
rcu_read_unlock();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
...@@ -612,8 +647,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); ...@@ -612,8 +647,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
*/ */
void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
{ {
gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size); struct percpu_ref *ref;
percpu_ref_put(&pdev->p2pdma->devmap_ref);
gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
(void **) &ref);
percpu_ref_put(ref);
} }
EXPORT_SYMBOL_GPL(pci_free_p2pmem); EXPORT_SYMBOL_GPL(pci_free_p2pmem);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment