Commit 26a8ea80 authored by Steve Sistare's avatar Steve Sistare Committed by Andrew Morton

mm/hugetlb: fix memfd_pin_folios resv_huge_pages leak

memfd_pin_folios followed by unpin_folios leaves resv_huge_pages elevated
if the pages were not already faulted in.  During a normal page fault,
resv_huge_pages is consumed here:

hugetlb_fault()
  alloc_hugetlb_folio()
    dequeue_hugetlb_folio_vma()
      dequeue_hugetlb_folio_nodemask()
        dequeue_hugetlb_folio_node_exact()
          free_huge_pages--
      resv_huge_pages--

During memfd_pin_folios, the page is created by calling
alloc_hugetlb_folio_nodemask instead of alloc_hugetlb_folio, and
resv_huge_pages is not modified:

memfd_alloc_folio()
  alloc_hugetlb_folio_nodemask()
    dequeue_hugetlb_folio_nodemask()
      dequeue_hugetlb_folio_node_exact()
        free_huge_pages--

alloc_hugetlb_folio_nodemask has other callers that must not modify
resv_huge_pages.  Therefore, to fix, define an alternate version of
alloc_hugetlb_folio_nodemask for this call site that adjusts
resv_huge_pages.

Link: https://lkml.kernel.org/r/1725373521-451395-4-git-send-email-steven.sistare@oracle.com
Fixes: 89c1905d ("mm/gup: introduce memfd_pin_folios() for pinning memfd folios")
Signed-off-by: default avatarSteve Sistare <steven.sistare@oracle.com>
Acked-by: default avatarVivek Kasireddy <vivek.kasireddy@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent c56b6f3d
......@@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback);
struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
......@@ -1059,6 +1062,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
return NULL;
}
static inline struct folio *
alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
{
return NULL;
}
static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
......
......@@ -2390,6 +2390,23 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
return folio;
}
struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
{
struct folio *folio;
spin_lock_irq(&hugetlb_lock);
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid,
nmask);
if (folio) {
VM_BUG_ON(!h->resv_huge_pages);
h->resv_huge_pages--;
}
spin_unlock_irq(&hugetlb_lock);
return folio;
}
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback)
......
......@@ -82,11 +82,10 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
gfp_mask = htlb_alloc_mask(hstate_file(memfd));
gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE);
folio = alloc_hugetlb_folio_nodemask(hstate_file(memfd),
numa_node_id(),
NULL,
gfp_mask,
false);
folio = alloc_hugetlb_folio_reserve(hstate_file(memfd),
numa_node_id(),
NULL,
gfp_mask);
if (folio && folio_try_get(folio)) {
err = hugetlb_add_to_page_cache(folio,
memfd->f_mapping,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment