mm/hugetlb: convert hugetlb fault paths to use alloc_hugetlb_folio()

Change alloc_huge_page() to alloc_hugetlb_folio() by changing all callers to handle the now folio return type of the function. In this conversion, alloc_huge_page_vma() is also changed to alloc_hugetlb_folio_vma() and hugepage_add_new_anon_rmap() is changed to take in a folio directly. Many additions of '&folio->page' are cleaned up in subsequent patches. hugetlbfs_fallocate() is also refactored to use the RCU + page_cache_next_miss() API. Link: https://lkml.kernel.org/r/20230125170537.96973-5-sidhartha.kumar@oracle.comSuggested-by: Mike Kravetz <mike.kravetz@oracle.com> Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Muchun Song <songmuchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

mm/hugetlb: convert hugetlb fault paths to use alloc_hugetlb_folio()
Change alloc_huge_page() to alloc_hugetlb_folio() by changing all callers to handle the now folio return type of the function. In this conversion, alloc_huge_page_vma() is also changed to alloc_hugetlb_folio_vma() and hugepage_add_new_anon_rmap() is changed to take in a folio directly. Many additions of '&folio->page' are cleaned up in subsequent patches. hugetlbfs_fallocate() is also refactored to use the RCU + page_cache_next_miss() API. Link: https://lkml.kernel.org/r/20230125170537.96973-5-sidhartha.kumar@oracle.comSuggested-by: Mike Kravetz <mike.kravetz@oracle.com> Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Muchun Song <songmuchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
d0ce0e47 · Sidhartha Kumar · Andrew Morton · ea8e72f4 · d0ce0e47 · d0ce0e47
Commit d0ce0e47 authored Jan 25, 2023 by Sidhartha Kumar Committed by Andrew Morton Feb 13, 2023
6 changed files
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -819,8 +819,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		 * This is supposed to be the vaddr where the page is being
 		 * faulted in, but we have no vaddr here.
 		 */
-		struct page *page;
+		struct folio *folio;
 		unsigned long addr;
+		bool present;

 		cond_resched();

@@ -844,48 +845,49 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		mutex_lock(&hugetlb_fault_mutex_table[hash]);

 		/* See if already present in mapping to avoid alloc/free */
-		page = find_get_page(mapping, index);
-		if (page) {
-			put_page(page);
+		rcu_read_lock();
+		present = page_cache_next_miss(mapping, index, 1) != index;
+		rcu_read_unlock();
+		if (present) {
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 			hugetlb_drop_vma_policy(&pseudo_vma);
 			continue;
 		}

 		/*
-		 * Allocate page without setting the avoid_reserve argument.
+		 * Allocate folio without setting the avoid_reserve argument.
 		 * There certainly are no reserves associated with the
 		 * pseudo_vma.  However, there could be shared mappings with
 		 * reserves for the file at the inode level.  If we fallocate
-		 * pages in these areas, we need to consume the reserves
+		 * folios in these areas, we need to consume the reserves
 		 * to keep reservation accounting consistent.
 		 */
-		page = alloc_huge_page(&pseudo_vma, addr, 0);
+		folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
 		hugetlb_drop_vma_policy(&pseudo_vma);
-		if (IS_ERR(page)) {
+		if (IS_ERR(folio)) {
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-			error = PTR_ERR(page);
+			error = PTR_ERR(folio);
 			goto out;
 		}
-		clear_huge_page(page, addr, pages_per_huge_page(h));
-		__SetPageUptodate(page);
-		error = hugetlb_add_to_page_cache(page, mapping, index);
+		clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
+		__folio_mark_uptodate(folio);
+		error = hugetlb_add_to_page_cache(&folio->page, mapping, index);
 		if (unlikely(error)) {
-			restore_reserve_on_error(h, &pseudo_vma, addr, page);
-			put_page(page);
+			restore_reserve_on_error(h, &pseudo_vma, addr, &folio->page);
+			folio_put(folio);
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 			goto out;
 		}

 		mutex_unlock(&hugetlb_fault_mutex_table[hash]);

-		SetHPageMigratable(page);
+		folio_set_hugetlb_migratable(folio);
 		/*
-		 * unlock_page because locked by hugetlb_add_to_page_cache()
-		 * put_page() due to reference from alloc_huge_page()
+		 * folio_unlock because locked by hugetlb_add_to_page_cache()
+		 * folio_put() due to reference from alloc_hugetlb_folio()
 		 */
-		unlock_page(page);
-		put_page(page);
+		folio_unlock(folio);
+		folio_put(folio);
 	}

 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)

--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -717,11 +717,11 @@ struct huge_bootmem_page {
 };

 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
-struct page *alloc_huge_page(struct vm_area_struct *vma,
+struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 				unsigned long addr, int avoid_reserve);
 struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 				nodemask_t *nmask, gfp_t gfp_mask);
-struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
+struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address);
 int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
@@ -1033,7 +1033,7 @@ static inline int isolate_or_dissolve_huge_page(struct page *page,
 	return -ENOMEM;
 }

-static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
+static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
 					   unsigned long addr,
 					   int avoid_reserve)
 {
@@ -1047,7 +1047,7 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 	return NULL;
 }

-static inline struct page *alloc_huge_page_vma(struct hstate *h,
+static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
 					       struct vm_area_struct *vma,
 					       unsigned long address)
 {

--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -203,7 +203,7 @@ void page_remove_rmap(struct page *, struct vm_area_struct *,

 void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long address, rmap_t flags);
-void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
 		unsigned long address);

 static inline void __page_dup_rmap(struct page *page, bool compound)

--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1210,9 +1210,11 @@ static struct page *new_page(struct page *page, unsigned long start)
 			break;
 	}

-	if (folio_test_hugetlb(src))
-		return alloc_huge_page_vma(page_hstate(&src->page),
+	if (folio_test_hugetlb(src)) {
+		dst = alloc_hugetlb_folio_vma(folio_hstate(src),
 				vma, address);
+		return &dst->page;
+	}

 	if (folio_test_large(src))
 		gfp = GFP_TRANSHUGE;

--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2534,15 +2534,13 @@ void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
 				     !!(flags & RMAP_EXCLUSIVE));
 }

-void hugepage_add_new_anon_rmap(struct page *page,
+void hugepage_add_new_anon_rmap(struct folio *folio,
 			struct vm_area_struct *vma, unsigned long address)
 {
-	struct folio *folio = page_folio(page);
-
 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	/* increment count (starts at -1) */
 	atomic_set(&folio->_entire_mapcount, 0);
 	folio_clear_hugetlb_restore_reserve(folio);
-	__page_set_anon_rmap(folio, page, vma, address, 1);
+	__page_set_anon_rmap(folio, &folio->page, vma, address, 1);
 }
 #endif /* CONFIG_HUGETLB_PAGE */