Commit d3035be4 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Linus Torvalds

mm: calculate deferred pages after skipping mirrored memory

update_defer_init() should be called only when struct page is about to be
initialized. Because it counts number of initialized struct pages, but
there we may skip struct pages if there is some mirrored memory.

So move, update_defer_init() after checking for mirrored memory.

Also, rename update_defer_init() to defer_init() and reverse the return
boolean to emphasize that this is a boolean function, that tells that the
reset of memmap initialization should be deferred.

Make this function self-contained: do not pass number of already
initialized pages in this zone by using static counters.

I found this bug by reading the code.  The effect is that fewer than
expected struct pages are initialized early in boot, and it is possible
that in some corner cases we may fail to boot when mirrored pages are
used.  The deferred on demand code should somewhat mitigate this.  But
this still brings some inconsistencies compared to when booting without
mirrored pages, so it is better to fix.

[pasha.tatashin@oracle.com: add comment about defer_init's lack of locking]
  Link: http://lkml.kernel.org/r/20180726193509.3326-3-pasha.tatashin@oracle.com
[akpm@linux-foundation.org: make defer_init non-inline, __meminit]
Link: http://lkml.kernel.org/r/20180724235520.10200-3-pasha.tatashin@oracle.comSigned-off-by: default avatarPavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent dfb3ccd0
...@@ -307,24 +307,33 @@ static inline bool __meminit early_page_uninitialised(unsigned long pfn) ...@@ -307,24 +307,33 @@ static inline bool __meminit early_page_uninitialised(unsigned long pfn)
} }
/* /*
* Returns false when the remaining initialisation should be deferred until * Returns true when the remaining initialisation should be deferred until
* later in the boot cycle when it can be parallelised. * later in the boot cycle when it can be parallelised.
*/ */
static inline bool update_defer_init(pg_data_t *pgdat, static bool __meminit
unsigned long pfn, unsigned long zone_end, defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
unsigned long *nr_initialised)
{ {
static unsigned long prev_end_pfn, nr_initialised;
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
*/
if (prev_end_pfn != end_pfn) {
prev_end_pfn = end_pfn;
nr_initialised = 0;
}
/* Always populate low zones for address-constrained allocations */ /* Always populate low zones for address-constrained allocations */
if (zone_end < pgdat_end_pfn(pgdat)) if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
return true;
(*nr_initialised)++;
if ((*nr_initialised > pgdat->static_init_pgcnt) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false; return false;
nr_initialised++;
if ((nr_initialised > NODE_DATA(nid)->static_init_pgcnt) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
NODE_DATA(nid)->first_deferred_pfn = pfn;
return true;
} }
return false;
return true;
} }
#else #else
static inline bool early_page_uninitialised(unsigned long pfn) static inline bool early_page_uninitialised(unsigned long pfn)
...@@ -332,11 +341,9 @@ static inline bool early_page_uninitialised(unsigned long pfn) ...@@ -332,11 +341,9 @@ static inline bool early_page_uninitialised(unsigned long pfn)
return false; return false;
} }
static inline bool update_defer_init(pg_data_t *pgdat, static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
{ {
return true; return false;
} }
#endif #endif
...@@ -5453,9 +5460,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -5453,9 +5460,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
struct vmem_altmap *altmap) struct vmem_altmap *altmap)
{ {
unsigned long end_pfn = start_pfn + size; unsigned long end_pfn = start_pfn + size;
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn; unsigned long pfn;
unsigned long nr_initialised = 0;
struct page *page; struct page *page;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
struct memblock_region *r = NULL, *tmp; struct memblock_region *r = NULL, *tmp;
...@@ -5494,8 +5499,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -5494,8 +5499,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
continue; continue;
if (!early_pfn_in_nid(pfn, nid)) if (!early_pfn_in_nid(pfn, nid))
continue; continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
break;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/* /*
...@@ -5518,6 +5521,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -5518,6 +5521,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
} }
} }
#endif #endif
if (defer_init(nid, pfn, end_pfn))
break;
not_early: not_early:
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment