Commit e9fdff87 authored by Muchun Song's avatar Muchun Song Committed by Linus Torvalds

mm: hugetlb: add a kernel parameter hugetlb_free_vmemmap

Add a kernel parameter hugetlb_free_vmemmap to enable the feature of
freeing unused vmemmap pages associated with each hugetlb page on boot.

We disable PMD mapping of vmemmap pages for x86-64 arch when this feature
is enabled.  Because vmemmap_remap_free() depends on vmemmap being base
page mapped.

Link: https://lkml.kernel.org/r/20210510030027.56044-8-songmuchun@bytedance.comSigned-off-by: default avatarMuchun Song <songmuchun@bytedance.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Reviewed-by: default avatarBarry Song <song.bao.hua@hisilicon.com>
Reviewed-by: default avatarMiaohe Lin <linmiaohe@huawei.com>
Tested-by: default avatarChen Huang <chenhuang5@huawei.com>
Tested-by: default avatarBodeddula Balasubramaniam <bodeddub@amazon.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: HORIGUCHI NAOYA <naoya.horiguchi@nec.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ad2fa371
...@@ -1567,6 +1567,23 @@ ...@@ -1567,6 +1567,23 @@
Documentation/admin-guide/mm/hugetlbpage.rst. Documentation/admin-guide/mm/hugetlbpage.rst.
Format: size[KMG] Format: size[KMG]
hugetlb_free_vmemmap=
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
enabled.
Allows heavy hugetlb users to free up some more
memory (6 * PAGE_SIZE for each 2MB hugetlb page).
This feauture is not free though. Large page
tables are not used to back vmemmap pages which
can lead to a performance degradation for some
workloads. Also there will be memory allocation
required when hugetlb pages are freed from the
pool which can lead to corner cases under heavy
memory pressure.
Format: { on | off (default) }
on: enable the feature
off: disable the feature
hung_task_panic= hung_task_panic=
[KNL] Should the hung task detector generate panics. [KNL] Should the hung task detector generate panics.
Format: 0 | 1 Format: 0 | 1
......
...@@ -153,6 +153,9 @@ default_hugepagesz ...@@ -153,6 +153,9 @@ default_hugepagesz
will all result in 256 2M huge pages being allocated. Valid default will all result in 256 2M huge pages being allocated. Valid default
huge page size is architecture dependent. huge page size is architecture dependent.
hugetlb_free_vmemmap
When CONFIG_HUGETLB_PAGE_FREE_VMEMMAP is set, this enables freeing
unused vmemmap pages associated with each HugeTLB page.
When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages`` When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
indicates the current number of pre-allocated huge pages of the default size. indicates the current number of pre-allocated huge pages of the default size.
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kcore.h> #include <linux/kcore.h>
#include <linux/bootmem_info.h> #include <linux/bootmem_info.h>
#include <linux/hugetlb.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
...@@ -1609,7 +1610,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, ...@@ -1609,7 +1610,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE)); VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE)); VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
if (end - start < PAGES_PER_SECTION * sizeof(struct page)) if ((is_hugetlb_free_vmemmap_enabled() && !altmap) ||
end - start < PAGES_PER_SECTION * sizeof(struct page))
err = vmemmap_populate_basepages(start, end, node, NULL); err = vmemmap_populate_basepages(start, end, node, NULL);
else if (boot_cpu_has(X86_FEATURE_PSE)) else if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap); err = vmemmap_populate_hugepages(start, end, node, altmap);
...@@ -1637,6 +1639,8 @@ void register_page_bootmem_memmap(unsigned long section_nr, ...@@ -1637,6 +1639,8 @@ void register_page_bootmem_memmap(unsigned long section_nr,
pmd_t *pmd; pmd_t *pmd;
unsigned int nr_pmd_pages; unsigned int nr_pmd_pages;
struct page *page; struct page *page;
bool base_mapping = !boot_cpu_has(X86_FEATURE_PSE) ||
is_hugetlb_free_vmemmap_enabled();
for (; addr < end; addr = next) { for (; addr < end; addr = next) {
pte_t *pte = NULL; pte_t *pte = NULL;
...@@ -1662,7 +1666,7 @@ void register_page_bootmem_memmap(unsigned long section_nr, ...@@ -1662,7 +1666,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
} }
get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
if (!boot_cpu_has(X86_FEATURE_PSE)) { if (base_mapping) {
next = (addr + PAGE_SIZE) & PAGE_MASK; next = (addr + PAGE_SIZE) & PAGE_MASK;
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) if (pmd_none(*pmd))
......
...@@ -892,6 +892,20 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, ...@@ -892,6 +892,20 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
} }
#endif #endif
#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
extern bool hugetlb_free_vmemmap_enabled;
static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
return hugetlb_free_vmemmap_enabled;
}
#else
static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
return false;
}
#endif
#else /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_HUGETLB_PAGE */
struct hstate {}; struct hstate {};
...@@ -1046,6 +1060,11 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr ...@@ -1046,6 +1060,11 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
pte_t *ptep, pte_t pte, unsigned long sz) pte_t *ptep, pte_t pte, unsigned long sz)
{ {
} }
static inline bool is_hugetlb_free_vmemmap_enabled(void)
{
return false;
}
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h, static inline spinlock_t *huge_pte_lock(struct hstate *h,
......
...@@ -168,6 +168,8 @@ ...@@ -168,6 +168,8 @@
* (last) level. So this type of HugeTLB page can be optimized only when its * (last) level. So this type of HugeTLB page can be optimized only when its
* size of the struct page structs is greater than 2 pages. * size of the struct page structs is greater than 2 pages.
*/ */
#define pr_fmt(fmt) "HugeTLB: " fmt
#include "hugetlb_vmemmap.h" #include "hugetlb_vmemmap.h"
/* /*
...@@ -180,6 +182,28 @@ ...@@ -180,6 +182,28 @@
#define RESERVE_VMEMMAP_NR 2U #define RESERVE_VMEMMAP_NR 2U
#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT) #define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
bool hugetlb_free_vmemmap_enabled;
static int __init early_hugetlb_free_vmemmap_param(char *buf)
{
/* We cannot optimize if a "struct page" crosses page boundaries. */
if ((!is_power_of_2(sizeof(struct page)))) {
pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n");
return 0;
}
if (!buf)
return -EINVAL;
if (!strcmp(buf, "on"))
hugetlb_free_vmemmap_enabled = true;
else if (strcmp(buf, "off"))
return -EINVAL;
return 0;
}
early_param("hugetlb_free_vmemmap", early_hugetlb_free_vmemmap_param);
static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h) static inline unsigned long free_vmemmap_pages_size_per_hpage(struct hstate *h)
{ {
return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT; return (unsigned long)free_vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment