Commit 678e174c authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Linus Torvalds

powerpc/mm/iommu: allow migration of cma allocated pages during mm_iommu_do_alloc

The current code doesn't do page migration if the page allocated is a
compound page.  With HugeTLB migration support, we can end up allocating
hugetlb pages from CMA region.  Also, THP pages can be allocated from
CMA region.  This patch updates the code to handle compound pages
correctly.  The patch also switches to a single get_user_pages with the
right count, instead of doing one get_user_pages per page.  That avoids
reading page table multiple times.  This is done by using
get_user_pages_longterm, because that also takes care of DAX backed
pages.

DAX pages lifetime is dictated by file system rules and as such, we need
to make sure that we free these pages on operations like truncate and
punch hole.  If we have long term pin on these pages, which are mostly
return to userspace with elevated page count, the entity holding the
long term pin may not be aware of the fact that file got truncated and
the file system blocks possibly got reused.  That can result in
corruption.

The patch also converts the hpas member of mm_iommu_table_group_mem_t to
a union.  We use the same storage location to store pointers to struct
page.  We cannot update all the code path use struct page *, because we
access hpas in real mode and we can't do that struct page * to pfn
conversion in real mode.

[aneesh.kumar@linux.ibm.com: address review feedback, update changelog]
  Link: http://lkml.kernel.org/r/20190227144736.5872-4-aneesh.kumar@linux.ibm.com
Link: http://lkml.kernel.org/r/20190114095438.32470-5-aneesh.kumar@linux.ibm.comSigned-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9a4e9f3b
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/sizes.h> #include <linux/sizes.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pte-walk.h> #include <asm/pte-walk.h>
#include <linux/mm_inline.h>
static DEFINE_MUTEX(mem_list_mutex); static DEFINE_MUTEX(mem_list_mutex);
...@@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t { ...@@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t {
atomic64_t mapped; atomic64_t mapped;
unsigned int pageshift; unsigned int pageshift;
u64 ua; /* userspace address */ u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */ u64 entries; /* number of entries in hpas/hpages[] */
u64 *hpas; /* vmalloc'ed */ /*
* in mm_iommu_get we temporarily use this to store
* struct page address.
*
* We need to convert ua to hpa in real mode. Make it
* simpler by storing physical address.
*/
union {
struct page **hpages; /* vmalloc'ed */
phys_addr_t *hpas;
};
#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
u64 dev_hpa; /* Device memory base address */ u64 dev_hpa; /* Device memory base address */
}; };
...@@ -80,64 +91,15 @@ bool mm_iommu_preregistered(struct mm_struct *mm) ...@@ -80,64 +91,15 @@ bool mm_iommu_preregistered(struct mm_struct *mm)
} }
EXPORT_SYMBOL_GPL(mm_iommu_preregistered); EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
/*
* Taken from alloc_migrate_target with changes to remove CMA allocations
*/
struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
{
gfp_t gfp_mask = GFP_USER;
struct page *new_page;
if (PageCompound(page))
return NULL;
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
/*
* We don't want the allocation to force an OOM if possibe
*/
new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
return new_page;
}
static int mm_iommu_move_page_from_cma(struct page *page)
{
int ret = 0;
LIST_HEAD(cma_migrate_pages);
/* Ignore huge pages for now */
if (PageCompound(page))
return -EBUSY;
lru_add_drain();
ret = isolate_lru_page(page);
if (ret)
return ret;
list_add(&page->lru, &cma_migrate_pages);
put_page(page); /* Drop the gup reference */
ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
if (ret) {
if (!list_empty(&cma_migrate_pages))
putback_movable_pages(&cma_migrate_pages);
}
return 0;
}
static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa, unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem) struct mm_iommu_table_group_mem_t **pmem)
{ {
struct mm_iommu_table_group_mem_t *mem; struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0; long i, ret, locked_entries = 0;
unsigned int pageshift; unsigned int pageshift;
unsigned long flags; unsigned long flags;
unsigned long cur_ua; unsigned long cur_ua;
struct page *page = NULL;
mutex_lock(&mem_list_mutex); mutex_lock(&mem_list_mutex);
...@@ -187,41 +149,25 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, ...@@ -187,41 +149,25 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
goto unlock_exit; goto unlock_exit;
} }
down_read(&mm->mmap_sem);
ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
up_read(&mm->mmap_sem);
if (ret != entries) {
/* free the reference taken */
for (i = 0; i < ret; i++)
put_page(mem->hpages[i]);
vfree(mem->hpas);
kfree(mem);
ret = -EFAULT;
goto unlock_exit;
}
pageshift = PAGE_SHIFT;
for (i = 0; i < entries; ++i) { for (i = 0; i < entries; ++i) {
struct page *page = mem->hpages[i];
cur_ua = ua + (i << PAGE_SHIFT); cur_ua = ua + (i << PAGE_SHIFT);
if (1 != get_user_pages_fast(cur_ua,
1/* pages */, 1/* iswrite */, &page)) {
ret = -EFAULT;
for (j = 0; j < i; ++j)
put_page(pfn_to_page(mem->hpas[j] >>
PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
goto unlock_exit;
}
/*
* If we get a page from the CMA zone, since we are going to
* be pinning these entries, we might as well move them out
* of the CMA zone if possible. NOTE: faulting in + migration
* can be expensive. Batching can be considered later
*/
if (is_migrate_cma_page(page)) {
if (mm_iommu_move_page_from_cma(page))
goto populate;
if (1 != get_user_pages_fast(cur_ua,
1/* pages */, 1/* iswrite */,
&page)) {
ret = -EFAULT;
for (j = 0; j < i; ++j)
put_page(pfn_to_page(mem->hpas[j] >>
PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
goto unlock_exit;
}
}
populate:
pageshift = PAGE_SHIFT;
if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) { if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
pte_t *pte; pte_t *pte;
struct page *head = compound_head(page); struct page *head = compound_head(page);
...@@ -239,10 +185,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, ...@@ -239,10 +185,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
local_irq_restore(flags); local_irq_restore(flags);
} }
mem->pageshift = min(mem->pageshift, pageshift); mem->pageshift = min(mem->pageshift, pageshift);
/*
* We don't need struct page reference any more, switch
* to physical address.
*/
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
} }
good_exit: good_exit:
ret = 0;
atomic64_set(&mem->mapped, 1); atomic64_set(&mem->mapped, 1);
mem->used = 1; mem->used = 1;
mem->ua = ua; mem->ua = ua;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment