Commit 9049771f authored by Dan Williams's avatar Dan Williams

mm: fix cache mode of dax pmd mappings

track_pfn_insert() in vmf_insert_pfn_pmd() is marking dax mappings as
uncacheable rendering them impractical for application usage.  DAX-pte
mappings are cached and the goal of establishing DAX-pmd mappings is to
attain more performance, not dramatically less (3 orders of magnitude).

track_pfn_insert() relies on a previous call to reserve_memtype() to
establish the expected page_cache_mode for the range.  While memremap()
arranges for reserve_memtype() to be called, devm_memremap_pages() does
not.  So, teach track_pfn_insert() and untrack_pfn() how to handle
tracking without a vma, and arrange for devm_memremap_pages() to
establish the write-back-cache reservation in the memtype tree.

Cc: <stable@vger.kernel.org>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Nilesh Choudhury <nilesh.choudhury@oracle.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: default avatarToshi Kani <toshi.kani@hpe.com>
Reported-by: default avatarKai Zhang <kai.ka.zhang@oracle.com>
Acked-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent ca120cf6
...@@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma) ...@@ -927,9 +927,10 @@ int track_pfn_copy(struct vm_area_struct *vma)
} }
/* /*
* prot is passed in as a parameter for the new mapping. If the vma has a * prot is passed in as a parameter for the new mapping. If the vma has
* linear pfn mapping for the entire range reserve the entire vma range with * a linear pfn mapping for the entire range, or no vma is provided,
* single reserve_pfn_range call. * reserve the entire pfn + size range with single reserve_pfn_range
* call.
*/ */
int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr, unsigned long size) unsigned long pfn, unsigned long addr, unsigned long size)
...@@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, ...@@ -938,11 +939,12 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
enum page_cache_mode pcm; enum page_cache_mode pcm;
/* reserve the whole chunk starting from paddr */ /* reserve the whole chunk starting from paddr */
if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { if (!vma || (addr == vma->vm_start
&& size == (vma->vm_end - vma->vm_start))) {
int ret; int ret;
ret = reserve_pfn_range(paddr, size, prot, 0); ret = reserve_pfn_range(paddr, size, prot, 0);
if (!ret) if (ret == 0 && vma)
vma->vm_flags |= VM_PAT; vma->vm_flags |= VM_PAT;
return ret; return ret;
} }
...@@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, ...@@ -997,7 +999,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
resource_size_t paddr; resource_size_t paddr;
unsigned long prot; unsigned long prot;
if (!(vma->vm_flags & VM_PAT)) if (vma && !(vma->vm_flags & VM_PAT))
return; return;
/* free the chunk starting from pfn or the whole chunk */ /* free the chunk starting from pfn or the whole chunk */
...@@ -1011,6 +1013,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, ...@@ -1011,6 +1013,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
size = vma->vm_end - vma->vm_start; size = vma->vm_end - vma->vm_start;
} }
free_pfn_range(paddr, size); free_pfn_range(paddr, size);
if (vma)
vma->vm_flags &= ~VM_PAT; vma->vm_flags &= ~VM_PAT;
} }
......
...@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) ...@@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE); align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size); arch_remove_memory(align_start, align_size);
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res); pgmap_radix_release(res);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__); "%s: failed to free all reserved pages\n", __func__);
...@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap) struct percpu_ref *ref, struct vmem_altmap *altmap)
{ {
resource_size_t key, align_start, align_size, align_end; resource_size_t key, align_start, align_size, align_end;
pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
struct page_map *page_map; struct page_map *page_map;
int error, nid, is_ram; int error, nid, is_ram;
...@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (nid < 0) if (nid < 0)
nid = numa_mem_id(); nid = numa_mem_id();
error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
align_size);
if (error)
goto err_pfn_remap;
error = arch_add_memory(nid, align_start, align_size, true); error = arch_add_memory(nid, align_start, align_size, true);
if (error) if (error)
goto err_add_memory; goto err_add_memory;
...@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, ...@@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
return __va(res->start); return __va(res->start);
err_add_memory: err_add_memory:
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
err_pfn_remap:
err_radix: err_radix:
pgmap_radix_release(res); pgmap_radix_release(res);
devres_free(page_map); devres_free(page_map);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment