Commit 1a367063 authored by Nathan Fontenot's avatar Nathan Fontenot Committed by Michael Ellerman

powerpc/pseries: Check memory device state before onlining/offlining

When DLPAR adding or removing memory we need to check the device
offline status before trying to online/offline the memory. This is
needed because calls to device_online() and device_offline() will
return non-zero for memory that is already online and offline
respectively.

This update resolves two scenarios. First, for a kernel built with
auto-online memory enabled (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y),
memory will be onlined as part of calls to add_memory(). After adding
the memory the pseries DLPAR code tries to online it and fails since
the memory is already online. The DLPAR code then tries to remove the
memory which produces the oops message below because the memory is not
offline.

The second scenario occurs when removing memory that is already
offline, i.e. marking memory offline (via sysfs) and then trying to
remove that memory. This doesn't work because offlining the already
offline memory does not succeed and the DLPAR code then fails the
DLPAR remove operation.

The fix for both scenarios is to check the device.offline status
before making the calls to device_online() or device_offline().

  kernel BUG at mm/memory_hotplug.c:1936!
  ...
  NIP [c0000000002ca428] .remove_memory+0xb8/0xc0
  LR [c0000000002ca3cc] .remove_memory+0x5c/0xc0
  Call Trace:
    .remove_memory+0x5c/0xc0 (unreliable)
    .dlpar_add_lmb+0x384/0x400
    .dlpar_memory+0x5dc/0xca0
    .handle_dlpar_errorlog+0x74/0xe0
    .pseries_hp_work_fn+0x2c/0x90
    .process_one_work+0x17c/0x460
    .worker_thread+0x88/0x500
    .kthread+0x15c/0x1a0
    .ret_from_kernel_thread+0x58/0xc0

Fixes: 943db62c ("powerpc/pseries: Revert 'Auto-online hotplugged memory'")
Signed-off-by: default avatarNathan Fontenot <nfont@linux.vnet.ibm.com>
[mpe: Use bool, add explicit rc=0 case, change log typos & formatting]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 8a583c0a
......@@ -336,7 +336,38 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
return mem_block;
}
static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
{
struct memory_block *mem_block;
int rc;
mem_block = lmb_to_memblock(lmb);
if (!mem_block)
return -EINVAL;
if (online && mem_block->dev.offline)
rc = device_online(&mem_block->dev);
else if (!online && !mem_block->dev.offline)
rc = device_offline(&mem_block->dev);
else
rc = 0;
put_device(&mem_block->dev);
return rc;
}
static int dlpar_online_lmb(struct of_drconf_cell *lmb)
{
return dlpar_change_lmb_state(lmb, true);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
{
return dlpar_change_lmb_state(lmb, false);
}
static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
{
unsigned long block_sz, start_pfn;
......@@ -431,19 +462,13 @@ static int dlpar_add_lmb(struct of_drconf_cell *);
static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
unsigned long block_sz;
int nid, rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
mem_block = lmb_to_memblock(lmb);
if (!mem_block)
return -EINVAL;
rc = device_offline(&mem_block->dev);
put_device(&mem_block->dev);
rc = dlpar_offline_lmb(lmb);
if (rc)
return rc;
......@@ -737,20 +762,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
static int dlpar_online_lmb(struct of_drconf_cell *lmb)
{
struct memory_block *mem_block;
int rc;
mem_block = lmb_to_memblock(lmb);
if (!mem_block)
return -EINVAL;
rc = device_online(&mem_block->dev);
put_device(&mem_block->dev);
return rc;
}
static int dlpar_add_lmb(struct of_drconf_cell *lmb)
{
unsigned long block_sz;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment