Commit 4003f107 authored by Logan Gunthorpe's avatar Logan Gunthorpe Committed by Jens Axboe

mm: introduce FOLL_PCI_P2PDMA to gate getting PCI P2PDMA pages

GUP Callers that expect PCI P2PDMA pages can now set FOLL_PCI_P2PDMA to
allow obtaining P2PDMA pages. If GUP is called without the flag and a
P2PDMA page is found, it will return an error in try_grab_page() or
try_grab_folio().

The check is safe to do before taking the reference to the page in both
cases seeing the page should be protected by either the appropriate
ptl or mmap_lock; or the gup fast guarantees preventing TLB flushes.

try_grab_folio() has one call site that WARNs on failure and cannot
actually deal with the failure of this function (it seems it will
get into an infinite loop). Expand the comment there to document a
couple more conditions on why it will not fail.

FOLL_PCI_P2PDMA cannot be set if FOLL_LONGTERM is set. This is to copy
fsdax until pgmap refcounts are fixed (see the link below for more
information).

Link: https://lkml.kernel.org/r/Yy4Ot5MoOhsgYLTQ@ziepe.caSigned-off-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221021174116.7200-3-logang@deltatee.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0f089235
...@@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */
/* /*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
......
...@@ -123,6 +123,9 @@ static inline struct folio *try_get_folio(struct page *page, int refs) ...@@ -123,6 +123,9 @@ static inline struct folio *try_get_folio(struct page *page, int refs)
*/ */
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
{ {
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
return NULL;
if (flags & FOLL_GET) if (flags & FOLL_GET)
return try_get_folio(page, refs); return try_get_folio(page, refs);
else if (flags & FOLL_PIN) { else if (flags & FOLL_PIN) {
...@@ -216,6 +219,9 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) ...@@ -216,6 +219,9 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM; return -ENOMEM;
if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))
return -EREMOTEIO;
if (flags & FOLL_GET) if (flags & FOLL_GET)
folio_ref_inc(folio); folio_ref_inc(folio);
else if (flags & FOLL_PIN) { else if (flags & FOLL_PIN) {
...@@ -631,6 +637,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, ...@@ -631,6 +637,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
page = ERR_PTR(ret); page = ERR_PTR(ret);
goto out; goto out;
} }
/* /*
* We need to make the page accessible if and only if we are going * We need to make the page accessible if and only if we are going
* to access its content (the FOLL_PIN case). Please see * to access its content (the FOLL_PIN case). Please see
...@@ -1060,6 +1067,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) ...@@ -1060,6 +1067,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if ((gup_flags & FOLL_LONGTERM) && (gup_flags & FOLL_PCI_P2PDMA))
return -EOPNOTSUPP;
if (vma_is_secretmem(vma)) if (vma_is_secretmem(vma))
return -EFAULT; return -EFAULT;
...@@ -2536,6 +2546,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, ...@@ -2536,6 +2546,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
undo_dev_pagemap(nr, nr_start, flags, pages); undo_dev_pagemap(nr, nr_start, flags, pages);
break; break;
} }
if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
SetPageReferenced(page); SetPageReferenced(page);
pages[*nr] = page; pages[*nr] = page;
if (unlikely(try_grab_page(page, flags))) { if (unlikely(try_grab_page(page, flags))) {
...@@ -3020,7 +3036,8 @@ static int internal_get_user_pages_fast(unsigned long start, ...@@ -3020,7 +3036,8 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET | FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT))) FOLL_FAST_ONLY | FOLL_NOFAULT |
FOLL_PCI_P2PDMA)))
return -EINVAL; return -EINVAL;
if (gup_flags & FOLL_PIN) if (gup_flags & FOLL_PIN)
......
...@@ -6361,8 +6361,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -6361,8 +6361,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* tables. If the huge page is present, then the tail * tables. If the huge page is present, then the tail
* pages must also be present. The ptl prevents the * pages must also be present. The ptl prevents the
* head page and tail pages from being rearranged in * head page and tail pages from being rearranged in
* any way. So this page must be available at this * any way. As this is hugetlb, the pages will never
* point, unless the page refcount overflowed: * be p2pdma or not longterm pinable. So this page
* must be available at this point, unless the page
* refcount overflowed:
*/ */
if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs, if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
flags))) { flags))) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment