Commit 0e9df1c9 authored by Ryan Roberts's avatar Ryan Roberts Committed by Will Deacon

arm64: mm: Don't remap pgtables for allocate vs populate

During linear map pgtable creation, each pgtable is fixmapped /
fixunmapped twice; once during allocation to zero the memory, and a
again during population to write the entries. This means each table has
2 TLB invalidations issued against it. Let's fix this so that each table
is only fixmapped/fixunmapped once, halving the number of TLBIs, and
improving performance.

Achieve this by separating allocation and initialization (zeroing) of
the page. The allocated page is now fixmapped directly by the walker and
initialized, before being populated and finally fixunmapped.

This approach keeps the change small, but has the side effect that late
allocations (using __get_free_page()) must also go through the generic
memory clearing routine. So let's tell __get_free_page() not to zero the
memory to avoid duplication.

Additionally this approach means that fixmap/fixunmap is still used for
late pgtable modifications. That's not technically needed since the
memory is all mapped in the linear map by that point. That's left as a
possible future optimization if found to be needed.

Execution time of map_mem(), which creates the kernel linear map page
tables, was measured on different machines with different RAM configs:

               | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
               | VM, 16G     | VM, 64G     | VM, 256G    | Metal, 512G
---------------|-------------|-------------|-------------|-------------
               |   ms    (%) |   ms    (%) |   ms    (%) |    ms    (%)
---------------|-------------|-------------|-------------|-------------
before         |   11   (0%) |  161   (0%) |  656   (0%) |  1654   (0%)
after          |   10 (-11%) |  104 (-35%) |  438 (-33%) |  1223 (-26%)
Signed-off-by: default avatarRyan Roberts <ryan.roberts@arm.com>
Suggested-by: default avatarMark Rutland <mark.rutland@arm.com>
Tested-by: default avatarItaru Kitayama <itaru.kitayama@fujitsu.com>
Tested-by: default avatarEric Chanudet <echanude@redhat.com>
Reviewed-by: default avatarMark Rutland <mark.rutland@arm.com>
Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240412131908.433043-4-ryan.roberts@arm.comSigned-off-by: default avatarWill Deacon <will@kernel.org>
parent 1fcb7cea
...@@ -1010,6 +1010,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) ...@@ -1010,6 +1010,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
static inline bool pgtable_l5_enabled(void) { return false; } static inline bool pgtable_l5_enabled(void) { return false; }
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
#define p4d_set_fixmap(addr) NULL #define p4d_set_fixmap(addr) NULL
#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
......
...@@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); ...@@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
static phys_addr_t __init early_pgtable_alloc(int shift) static phys_addr_t __init early_pgtable_alloc(int shift)
{ {
phys_addr_t phys; phys_addr_t phys;
void *ptr;
phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
MEMBLOCK_ALLOC_NOLEAKTRACE); MEMBLOCK_ALLOC_NOLEAKTRACE);
if (!phys) if (!phys)
panic("Failed to allocate page table page\n"); panic("Failed to allocate page table page\n");
/*
* The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
* slot will be free, so we can (ab)use the FIX_PTE slot to initialise
* any level of table.
*/
ptr = pte_set_fixmap(phys);
memset(ptr, 0, PAGE_SIZE);
/*
* Implicit barriers also ensure the zeroed page is visible to the page
* table walker
*/
pte_clear_fixmap();
return phys; return phys;
} }
...@@ -172,6 +156,14 @@ bool pgattr_change_is_safe(u64 old, u64 new) ...@@ -172,6 +156,14 @@ bool pgattr_change_is_safe(u64 old, u64 new)
return ((old ^ new) & ~mask) == 0; return ((old ^ new) & ~mask) == 0;
} }
static void init_clear_pgtable(void *table)
{
clear_page(table);
/* Ensure the zeroing is observed by page table walks. */
dsb(ishst);
}
static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot) phys_addr_t phys, pgprot_t prot)
{ {
...@@ -214,12 +206,15 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, ...@@ -214,12 +206,15 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
pmdval |= PMD_TABLE_PXN; pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc); BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc(PAGE_SHIFT); pte_phys = pgtable_alloc(PAGE_SHIFT);
ptep = pte_set_fixmap(pte_phys);
init_clear_pgtable(ptep);
ptep += pte_index(addr);
__pmd_populate(pmdp, pte_phys, pmdval); __pmd_populate(pmdp, pte_phys, pmdval);
pmd = READ_ONCE(*pmdp); } else {
BUG_ON(pmd_bad(pmd));
ptep = pte_set_fixmap_offset(pmdp, addr);
} }
BUG_ON(pmd_bad(pmd));
ptep = pte_set_fixmap_offset(pmdp, addr);
do { do {
pgprot_t __prot = prot; pgprot_t __prot = prot;
...@@ -298,12 +293,15 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, ...@@ -298,12 +293,15 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
pudval |= PUD_TABLE_PXN; pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc); BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc(PMD_SHIFT); pmd_phys = pgtable_alloc(PMD_SHIFT);
pmdp = pmd_set_fixmap(pmd_phys);
init_clear_pgtable(pmdp);
pmdp += pmd_index(addr);
__pud_populate(pudp, pmd_phys, pudval); __pud_populate(pudp, pmd_phys, pudval);
pud = READ_ONCE(*pudp); } else {
BUG_ON(pud_bad(pud));
pmdp = pmd_set_fixmap_offset(pudp, addr);
} }
BUG_ON(pud_bad(pud));
pmdp = pmd_set_fixmap_offset(pudp, addr);
do { do {
pgprot_t __prot = prot; pgprot_t __prot = prot;
...@@ -340,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, ...@@ -340,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
p4dval |= P4D_TABLE_PXN; p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc); BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc(PUD_SHIFT); pud_phys = pgtable_alloc(PUD_SHIFT);
pudp = pud_set_fixmap(pud_phys);
init_clear_pgtable(pudp);
pudp += pud_index(addr);
__p4d_populate(p4dp, pud_phys, p4dval); __p4d_populate(p4dp, pud_phys, p4dval);
p4d = READ_ONCE(*p4dp); } else {
BUG_ON(p4d_bad(p4d));
pudp = pud_set_fixmap_offset(p4dp, addr);
} }
BUG_ON(p4d_bad(p4d));
pudp = pud_set_fixmap_offset(p4dp, addr);
do { do {
pud_t old_pud = READ_ONCE(*pudp); pud_t old_pud = READ_ONCE(*pudp);
...@@ -395,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, ...@@ -395,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
pgdval |= PGD_TABLE_PXN; pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc); BUG_ON(!pgtable_alloc);
p4d_phys = pgtable_alloc(P4D_SHIFT); p4d_phys = pgtable_alloc(P4D_SHIFT);
p4dp = p4d_set_fixmap(p4d_phys);
init_clear_pgtable(p4dp);
p4dp += p4d_index(addr);
__pgd_populate(pgdp, p4d_phys, pgdval); __pgd_populate(pgdp, p4d_phys, pgdval);
pgd = READ_ONCE(*pgdp); } else {
BUG_ON(pgd_bad(pgd));
p4dp = p4d_set_fixmap_offset(pgdp, addr);
} }
BUG_ON(pgd_bad(pgd));
p4dp = p4d_set_fixmap_offset(pgdp, addr);
do { do {
p4d_t old_p4d = READ_ONCE(*p4dp); p4d_t old_p4d = READ_ONCE(*p4dp);
...@@ -467,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, ...@@ -467,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
static phys_addr_t __pgd_pgtable_alloc(int shift) static phys_addr_t __pgd_pgtable_alloc(int shift)
{ {
void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
BUG_ON(!ptr); void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
/* Ensure the zeroed page is visible to the page table walker */ BUG_ON(!ptr);
dsb(ishst);
return __pa(ptr); return __pa(ptr);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment