Commit 86586a41 authored by Jérôme Glisse's avatar Jérôme Glisse Committed by Linus Torvalds

mm/hmm: remove HMM_PFN_READ flag and ignore peculiar architecture

Only peculiar architecture allow write without read thus assume that any
valid pfn do allow for read.  Note we do not care for write only because
it does make sense with thing like atomic compare and exchange or any
other operations that allow you to get the memory value through them.

Link: http://lkml.kernel.org/r/20180323005527.758-8-jglisse@redhat.comSigned-off-by: default avatarJérôme Glisse <jglisse@redhat.com>
Reviewed-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Cc: Evgeny Baskakov <ebaskakov@nvidia.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mark Hairgrove <mhairgrove@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 08232a45
...@@ -83,8 +83,7 @@ struct hmm; ...@@ -83,8 +83,7 @@ struct hmm;
* hmm_pfn_t - HMM uses its own pfn type to keep several flags per page * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
* *
* Flags: * Flags:
* HMM_PFN_VALID: pfn is valid * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
* HMM_PFN_READ: CPU page table has read permission set
* HMM_PFN_WRITE: CPU page table has write permission set * HMM_PFN_WRITE: CPU page table has write permission set
* HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
* HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
...@@ -97,13 +96,12 @@ struct hmm; ...@@ -97,13 +96,12 @@ struct hmm;
typedef unsigned long hmm_pfn_t; typedef unsigned long hmm_pfn_t;
#define HMM_PFN_VALID (1 << 0) #define HMM_PFN_VALID (1 << 0)
#define HMM_PFN_READ (1 << 1) #define HMM_PFN_WRITE (1 << 1)
#define HMM_PFN_WRITE (1 << 2) #define HMM_PFN_ERROR (1 << 2)
#define HMM_PFN_ERROR (1 << 3) #define HMM_PFN_EMPTY (1 << 3)
#define HMM_PFN_EMPTY (1 << 4) #define HMM_PFN_SPECIAL (1 << 4)
#define HMM_PFN_SPECIAL (1 << 5) #define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 5)
#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6) #define HMM_PFN_SHIFT 6
#define HMM_PFN_SHIFT 7
/* /*
* hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
......
...@@ -417,11 +417,9 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -417,11 +417,9 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
hmm_pfn_t *pfns = range->pfns; hmm_pfn_t *pfns = range->pfns;
unsigned long addr = start, i; unsigned long addr = start, i;
bool write_fault; bool write_fault;
hmm_pfn_t flag;
pte_t *ptep; pte_t *ptep;
i = (addr - range->start) >> PAGE_SHIFT; i = (addr - range->start) >> PAGE_SHIFT;
flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
write_fault = hmm_vma_walk->fault & hmm_vma_walk->write; write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
again: again:
...@@ -433,6 +431,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -433,6 +431,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) { if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
unsigned long pfn; unsigned long pfn;
hmm_pfn_t flag = 0;
pmd_t pmd; pmd_t pmd;
/* /*
...@@ -497,7 +496,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -497,7 +496,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
} else if (write_fault) } else if (write_fault)
goto fault; goto fault;
pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE; pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
pfns[i] |= flag;
} else if (is_migration_entry(entry)) { } else if (is_migration_entry(entry)) {
if (hmm_vma_walk->fault) { if (hmm_vma_walk->fault) {
pte_unmap(ptep); pte_unmap(ptep);
...@@ -517,7 +515,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -517,7 +515,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
if (write_fault && !pte_write(pte)) if (write_fault && !pte_write(pte))
goto fault; goto fault;
pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte));
pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
continue; continue;
...@@ -534,7 +532,8 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -534,7 +532,8 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
/* /*
* hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
* @range: range being snapshotted * @range: range being snapshotted
* Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
* vma permission, 0 success
* *
* This snapshots the CPU page table for a range of virtual addresses. Snapshot * This snapshots the CPU page table for a range of virtual addresses. Snapshot
* validity is tracked by range struct. See hmm_vma_range_done() for further * validity is tracked by range struct. See hmm_vma_range_done() for further
...@@ -573,6 +572,17 @@ int hmm_vma_get_pfns(struct hmm_range *range) ...@@ -573,6 +572,17 @@ int hmm_vma_get_pfns(struct hmm_range *range)
if (!hmm->mmu_notifier.ops) if (!hmm->mmu_notifier.ops)
return -EINVAL; return -EINVAL;
if (!(vma->vm_flags & VM_READ)) {
/*
* If vma do not allow read access, then assume that it does
* not allow write access, either. Architecture that allow
* write without read access are not supported by HMM, because
* operations such has atomic access would not work.
*/
hmm_pfns_clear(range->pfns, range->start, range->end);
return -EPERM;
}
/* Initialize range to track CPU page table update */ /* Initialize range to track CPU page table update */
spin_lock(&hmm->lock); spin_lock(&hmm->lock);
range->valid = true; range->valid = true;
...@@ -686,6 +696,9 @@ EXPORT_SYMBOL(hmm_vma_range_done); ...@@ -686,6 +696,9 @@ EXPORT_SYMBOL(hmm_vma_range_done);
* goto retry; * goto retry;
* case 0: * case 0:
* break; * break;
* case -ENOMEM:
* case -EINVAL:
* case -EPERM:
* default: * default:
* // Handle error ! * // Handle error !
* up_read(&mm->mmap_sem) * up_read(&mm->mmap_sem)
...@@ -727,11 +740,16 @@ int hmm_vma_fault(struct hmm_range *range, bool write, bool block) ...@@ -727,11 +740,16 @@ int hmm_vma_fault(struct hmm_range *range, bool write, bool block)
if (!hmm->mmu_notifier.ops) if (!hmm->mmu_notifier.ops)
return -EINVAL; return -EINVAL;
/* Initialize range to track CPU page table update */ if (!(vma->vm_flags & VM_READ)) {
spin_lock(&hmm->lock); /*
range->valid = true; * If vma do not allow read access, then assume that it does
list_add_rcu(&range->list, &hmm->ranges); * not allow write access, either. Architecture that allow
spin_unlock(&hmm->lock); * write without read access are not supported by HMM, because
* operations such has atomic access would not work.
*/
hmm_pfns_clear(range->pfns, range->start, range->end);
return -EPERM;
}
/* FIXME support hugetlb fs */ /* FIXME support hugetlb fs */
if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
...@@ -739,6 +757,12 @@ int hmm_vma_fault(struct hmm_range *range, bool write, bool block) ...@@ -739,6 +757,12 @@ int hmm_vma_fault(struct hmm_range *range, bool write, bool block)
return 0; return 0;
} }
/* Initialize range to track CPU page table update */
spin_lock(&hmm->lock);
range->valid = true;
list_add_rcu(&range->list, &hmm->ranges);
spin_unlock(&hmm->lock);
hmm_vma_walk.fault = true; hmm_vma_walk.fault = true;
hmm_vma_walk.write = write; hmm_vma_walk.write = write;
hmm_vma_walk.block = block; hmm_vma_walk.block = block;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment