Commit 1d7f940c authored by Ralph Campbell's avatar Ralph Campbell Committed by Ben Skeggs

drm/nouveau/nouveau/hmm: fix nouveau_dmem_chunk allocations

In nouveau_dmem_init(), a number of struct nouveau_dmem_chunk are allocated
and put on the dmem->chunk_empty list. Then in nouveau_dmem_pages_alloc(),
a nouveau_dmem_chunk is removed from the list and GPU memory is allocated.
However, the nouveau_dmem_chunk is never removed from the chunk_empty
list nor placed on the chunk_free or chunk_full lists. This results
in only one chunk ever being actually used (2MB) and quickly leads to
migration to device private memory failures.

Fix this by having just one list of free device private pages and if no
pages are free, allocate a chunk of device private pages and GPU memory.
Signed-off-by: default avatarRalph Campbell <rcampbell@nvidia.com>
Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent d6a9efec
...@@ -61,10 +61,8 @@ struct nouveau_dmem_chunk { ...@@ -61,10 +61,8 @@ struct nouveau_dmem_chunk {
struct list_head list; struct list_head list;
struct nouveau_bo *bo; struct nouveau_bo *bo;
struct nouveau_drm *drm; struct nouveau_drm *drm;
unsigned long pfn_first;
unsigned long callocated; unsigned long callocated;
unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)]; struct dev_pagemap pagemap;
spinlock_t lock;
}; };
struct nouveau_dmem_migrate { struct nouveau_dmem_migrate {
...@@ -74,48 +72,50 @@ struct nouveau_dmem_migrate { ...@@ -74,48 +72,50 @@ struct nouveau_dmem_migrate {
struct nouveau_dmem { struct nouveau_dmem {
struct nouveau_drm *drm; struct nouveau_drm *drm;
struct dev_pagemap pagemap;
struct nouveau_dmem_migrate migrate; struct nouveau_dmem_migrate migrate;
struct list_head chunk_free; struct list_head chunks;
struct list_head chunk_full;
struct list_head chunk_empty;
struct mutex mutex; struct mutex mutex;
struct page *free_pages;
spinlock_t lock;
}; };
static inline struct nouveau_dmem *page_to_dmem(struct page *page) static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
{
return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
}
static struct nouveau_drm *page_to_drm(struct page *page)
{ {
return container_of(page->pgmap, struct nouveau_dmem, pagemap); struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
return chunk->drm;
} }
static unsigned long nouveau_dmem_page_addr(struct page *page) static unsigned long nouveau_dmem_page_addr(struct page *page)
{ {
struct nouveau_dmem_chunk *chunk = page->zone_device_data; struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
unsigned long idx = page_to_pfn(page) - chunk->pfn_first; unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
chunk->pagemap.res.start;
return (idx << PAGE_SHIFT) + chunk->bo->bo.offset; return chunk->bo->bo.offset + off;
} }
static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_page_free(struct page *page)
{ {
struct nouveau_dmem_chunk *chunk = page->zone_device_data; struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
unsigned long idx = page_to_pfn(page) - chunk->pfn_first; struct nouveau_dmem *dmem = chunk->drm->dmem;
spin_lock(&dmem->lock);
page->zone_device_data = dmem->free_pages;
dmem->free_pages = page;
/*
* FIXME:
*
* This is really a bad example, we need to overhaul nouveau memory
* management to be more page focus and allow lighter locking scheme
* to be use in the process.
*/
spin_lock(&chunk->lock);
clear_bit(idx, chunk->bitmap);
WARN_ON(!chunk->callocated); WARN_ON(!chunk->callocated);
chunk->callocated--; chunk->callocated--;
/* /*
* FIXME when chunk->callocated reach 0 we should add the chunk to * FIXME when chunk->callocated reach 0 we should add the chunk to
* a reclaim list so that it can be freed in case of memory pressure. * a reclaim list so that it can be freed in case of memory pressure.
*/ */
spin_unlock(&chunk->lock); spin_unlock(&dmem->lock);
} }
static void nouveau_dmem_fence_done(struct nouveau_fence **fence) static void nouveau_dmem_fence_done(struct nouveau_fence **fence)
...@@ -167,8 +167,8 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm, ...@@ -167,8 +167,8 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
{ {
struct nouveau_dmem *dmem = page_to_dmem(vmf->page); struct nouveau_drm *drm = page_to_drm(vmf->page);
struct nouveau_drm *drm = dmem->drm; struct nouveau_dmem *dmem = drm->dmem;
struct nouveau_fence *fence; struct nouveau_fence *fence;
unsigned long src = 0, dst = 0; unsigned long src = 0, dst = 0;
dma_addr_t dma_addr = 0; dma_addr_t dma_addr = 0;
...@@ -211,131 +211,105 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = { ...@@ -211,131 +211,105 @@ static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
}; };
static int static int
nouveau_dmem_chunk_alloc(struct nouveau_drm *drm) nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
{ {
struct nouveau_dmem_chunk *chunk; struct nouveau_dmem_chunk *chunk;
struct resource *res;
struct page *page;
void *ptr;
unsigned long i, pfn_first;
int ret; int ret;
if (drm->dmem == NULL) chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
return -EINVAL;
mutex_lock(&drm->dmem->mutex);
chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
struct nouveau_dmem_chunk,
list);
if (chunk == NULL) { if (chunk == NULL) {
mutex_unlock(&drm->dmem->mutex); ret = -ENOMEM;
return -ENOMEM; goto out;
} }
list_del(&chunk->list); /* Allocate unused physical address space for device private pages. */
mutex_unlock(&drm->dmem->mutex); res = request_free_mem_region(&iomem_resource, DMEM_CHUNK_SIZE,
"nouveau_dmem");
if (IS_ERR(res)) {
ret = PTR_ERR(res);
goto out_free;
}
chunk->drm = drm;
chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
chunk->pagemap.res = *res;
chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
chunk->pagemap.owner = drm->dev;
ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0, ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL, TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
&chunk->bo); &chunk->bo);
if (ret) if (ret)
goto out; goto out_release;
ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
if (ret) { if (ret)
nouveau_bo_ref(NULL, &chunk->bo); goto out_bo_free;
goto out;
}
bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES); ptr = memremap_pages(&chunk->pagemap, numa_node_id());
spin_lock_init(&chunk->lock); if (IS_ERR(ptr)) {
ret = PTR_ERR(ptr);
goto out_bo_unpin;
}
out:
mutex_lock(&drm->dmem->mutex); mutex_lock(&drm->dmem->mutex);
if (chunk->bo) list_add(&chunk->list, &drm->dmem->chunks);
list_add(&chunk->list, &drm->dmem->chunk_empty);
else
list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
mutex_unlock(&drm->dmem->mutex); mutex_unlock(&drm->dmem->mutex);
return ret; pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
} page = pfn_to_page(pfn_first);
spin_lock(&drm->dmem->lock);
static struct nouveau_dmem_chunk * for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm) page->zone_device_data = drm->dmem->free_pages;
{ drm->dmem->free_pages = page;
struct nouveau_dmem_chunk *chunk;
chunk = list_first_entry_or_null(&drm->dmem->chunk_free,
struct nouveau_dmem_chunk,
list);
if (chunk)
return chunk;
chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
struct nouveau_dmem_chunk,
list);
if (chunk->bo)
return chunk;
return NULL;
}
static int
nouveau_dmem_pages_alloc(struct nouveau_drm *drm,
unsigned long npages,
unsigned long *pages)
{
struct nouveau_dmem_chunk *chunk;
unsigned long c;
int ret;
memset(pages, 0xff, npages * sizeof(*pages));
mutex_lock(&drm->dmem->mutex);
for (c = 0; c < npages;) {
unsigned long i;
chunk = nouveau_dmem_chunk_first_free_locked(drm);
if (chunk == NULL) {
mutex_unlock(&drm->dmem->mutex);
ret = nouveau_dmem_chunk_alloc(drm);
if (ret) {
if (c)
return 0;
return ret;
}
mutex_lock(&drm->dmem->mutex);
continue;
}
spin_lock(&chunk->lock);
i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES);
while (i < DMEM_CHUNK_NPAGES && c < npages) {
pages[c] = chunk->pfn_first + i;
set_bit(i, chunk->bitmap);
chunk->callocated++;
c++;
i = find_next_zero_bit(chunk->bitmap,
DMEM_CHUNK_NPAGES, i);
}
spin_unlock(&chunk->lock);
} }
mutex_unlock(&drm->dmem->mutex); *ppage = page;
chunk->callocated++;
spin_unlock(&drm->dmem->lock);
NV_INFO(drm, "DMEM: registered %ldMB of device memory\n",
DMEM_CHUNK_SIZE >> 20);
return 0; return 0;
out_bo_unpin:
nouveau_bo_unpin(chunk->bo);
out_bo_free:
nouveau_bo_ref(NULL, &chunk->bo);
out_release:
release_mem_region(chunk->pagemap.res.start,
resource_size(&chunk->pagemap.res));
out_free:
kfree(chunk);
out:
return ret;
} }
static struct page * static struct page *
nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm) nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
{ {
unsigned long pfns[1]; struct nouveau_dmem_chunk *chunk;
struct page *page; struct page *page = NULL;
int ret; int ret;
/* FIXME stop all the miss-match API ... */ spin_lock(&drm->dmem->lock);
ret = nouveau_dmem_pages_alloc(drm, 1, pfns); if (drm->dmem->free_pages) {
if (ret) page = drm->dmem->free_pages;
return NULL; drm->dmem->free_pages = page->zone_device_data;
chunk = nouveau_page_to_chunk(page);
chunk->callocated++;
spin_unlock(&drm->dmem->lock);
} else {
spin_unlock(&drm->dmem->lock);
ret = nouveau_dmem_chunk_alloc(drm, &page);
if (ret)
return NULL;
}
page = pfn_to_page(pfns[0]);
get_page(page); get_page(page);
lock_page(page); lock_page(page);
return page; return page;
...@@ -358,12 +332,7 @@ nouveau_dmem_resume(struct nouveau_drm *drm) ...@@ -358,12 +332,7 @@ nouveau_dmem_resume(struct nouveau_drm *drm)
return; return;
mutex_lock(&drm->dmem->mutex); mutex_lock(&drm->dmem->mutex);
list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { list_for_each_entry(chunk, &drm->dmem->chunks, list) {
ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
/* FIXME handle pin failure */
WARN_ON(ret);
}
list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
/* FIXME handle pin failure */ /* FIXME handle pin failure */
WARN_ON(ret); WARN_ON(ret);
...@@ -380,12 +349,8 @@ nouveau_dmem_suspend(struct nouveau_drm *drm) ...@@ -380,12 +349,8 @@ nouveau_dmem_suspend(struct nouveau_drm *drm)
return; return;
mutex_lock(&drm->dmem->mutex); mutex_lock(&drm->dmem->mutex);
list_for_each_entry (chunk, &drm->dmem->chunk_free, list) { list_for_each_entry(chunk, &drm->dmem->chunks, list)
nouveau_bo_unpin(chunk->bo);
}
list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
nouveau_bo_unpin(chunk->bo); nouveau_bo_unpin(chunk->bo);
}
mutex_unlock(&drm->dmem->mutex); mutex_unlock(&drm->dmem->mutex);
} }
...@@ -399,15 +364,13 @@ nouveau_dmem_fini(struct nouveau_drm *drm) ...@@ -399,15 +364,13 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
mutex_lock(&drm->dmem->mutex); mutex_lock(&drm->dmem->mutex);
WARN_ON(!list_empty(&drm->dmem->chunk_free)); list_for_each_entry_safe(chunk, tmp, &drm->dmem->chunks, list) {
WARN_ON(!list_empty(&drm->dmem->chunk_full)); nouveau_bo_unpin(chunk->bo);
nouveau_bo_ref(NULL, &chunk->bo);
list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) {
if (chunk->bo) {
nouveau_bo_unpin(chunk->bo);
nouveau_bo_ref(NULL, &chunk->bo);
}
list_del(&chunk->list); list_del(&chunk->list);
memunmap_pages(&chunk->pagemap);
release_mem_region(chunk->pagemap.res.start,
resource_size(&chunk->pagemap.res));
kfree(chunk); kfree(chunk);
} }
...@@ -493,9 +456,6 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm) ...@@ -493,9 +456,6 @@ nouveau_dmem_migrate_init(struct nouveau_drm *drm)
void void
nouveau_dmem_init(struct nouveau_drm *drm) nouveau_dmem_init(struct nouveau_drm *drm)
{ {
struct device *device = drm->dev->dev;
struct resource *res;
unsigned long i, size, pfn_first;
int ret; int ret;
/* This only make sense on PASCAL or newer */ /* This only make sense on PASCAL or newer */
...@@ -507,59 +467,16 @@ nouveau_dmem_init(struct nouveau_drm *drm) ...@@ -507,59 +467,16 @@ nouveau_dmem_init(struct nouveau_drm *drm)
drm->dmem->drm = drm; drm->dmem->drm = drm;
mutex_init(&drm->dmem->mutex); mutex_init(&drm->dmem->mutex);
INIT_LIST_HEAD(&drm->dmem->chunk_free); INIT_LIST_HEAD(&drm->dmem->chunks);
INIT_LIST_HEAD(&drm->dmem->chunk_full); mutex_init(&drm->dmem->mutex);
INIT_LIST_HEAD(&drm->dmem->chunk_empty); spin_lock_init(&drm->dmem->lock);
size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE);
/* Initialize migration dma helpers before registering memory */ /* Initialize migration dma helpers before registering memory */
ret = nouveau_dmem_migrate_init(drm); ret = nouveau_dmem_migrate_init(drm);
if (ret) if (ret) {
goto out_free; kfree(drm->dmem);
drm->dmem = NULL;
/*
* FIXME we need some kind of policy to decide how much VRAM we
* want to register with HMM. For now just register everything
* and latter if we want to do thing like over commit then we
* could revisit this.
*/
res = devm_request_free_mem_region(device, &iomem_resource, size);
if (IS_ERR(res))
goto out_free;
drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
drm->dmem->pagemap.res = *res;
drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
drm->dmem->pagemap.owner = drm->dev;
if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
goto out_free;
pfn_first = res->start >> PAGE_SHIFT;
for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
struct nouveau_dmem_chunk *chunk;
struct page *page;
unsigned long j;
chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
if (chunk == NULL) {
nouveau_dmem_fini(drm);
return;
}
chunk->drm = drm;
chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
page = pfn_to_page(chunk->pfn_first);
for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
page->zone_device_data = chunk;
} }
NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
return;
out_free:
kfree(drm->dmem);
drm->dmem = NULL;
} }
static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm, static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
...@@ -646,6 +563,9 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, ...@@ -646,6 +563,9 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
u64 *pfns; u64 *pfns;
int ret = -ENOMEM; int ret = -ENOMEM;
if (drm->dmem == NULL)
return -ENODEV;
args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL);
if (!args.src) if (!args.src)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment