Commit 30cff8ab authored by Jane Chu's avatar Jane Chu Committed by Greg Kroah-Hartman

mm/memory-failure: poison read receives SIGKILL instead of SIGBUS if mmaped more than once

commit 3d7fed4a upstream.

Mmap /dev/dax more than once, then read the poison location using
address from one of the mappings.  The other mappings due to not having
the page mapped in will cause SIGKILLs delivered to the process.
SIGKILL succeeds over SIGBUS, so user process loses the opportunity to
handle the UE.

Although one may add MAP_POPULATE to mmap(2) to work around the issue,
MAP_POPULATE makes mapping 128GB of pmem several magnitudes slower, so
isn't always an option.

Details -

  ndctl inject-error --block=10 --count=1 namespace6.0

  ./read_poison -x dax6.0 -o 5120 -m 2
  mmaped address 0x7f5bb6600000
  mmaped address 0x7f3cf3600000
  doing local read at address 0x7f3cf3601400
  Killed

Console messages in instrumented kernel -

  mce: Uncorrected hardware memory error in user-access at edbe201400
  Memory failure: tk->addr = 7f5bb6601000
  Memory failure: address edbe201: call dev_pagemap_mapping_shift
  dev_pagemap_mapping_shift: page edbe201: no PUD
  Memory failure: tk->size_shift == 0
  Memory failure: Unable to find user space address edbe201 in read_poison
  Memory failure: tk->addr = 7f3cf3601000
  Memory failure: address edbe201: call dev_pagemap_mapping_shift
  Memory failure: tk->size_shift = 21
  Memory failure: 0xedbe201: forcibly killing read_poison:22434 because of failure to unmap corrupted page
    => to deliver SIGKILL
  Memory failure: 0xedbe201: Killing read_poison:22434 due to hardware memory corruption
    => to deliver SIGBUS

Link: http://lkml.kernel.org/r/1565112345-28754-3-git-send-email-jane.chu@oracle.comSigned-off-by: default avatarJane Chu <jane.chu@oracle.com>
Suggested-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Acked-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 91eec769
...@@ -202,7 +202,6 @@ struct to_kill { ...@@ -202,7 +202,6 @@ struct to_kill {
struct task_struct *tsk; struct task_struct *tsk;
unsigned long addr; unsigned long addr;
short size_shift; short size_shift;
char addr_valid;
}; };
/* /*
...@@ -327,22 +326,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -327,22 +326,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
} }
} }
tk->addr = page_address_in_vma(p, vma); tk->addr = page_address_in_vma(p, vma);
tk->addr_valid = 1;
if (is_zone_device_page(p)) if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma); tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/* /*
* In theory we don't have to kill when the page was * Send SIGKILL if "tk->addr == -EFAULT". Also, as
* munmaped. But it could be also a mremap. Since that's * "tk->size_shift" is always non-zero for !is_zone_device_page(),
* likely very rare kill anyways just out of paranoia, but use * so "tk->size_shift == 0" effectively checks no mapping on
* a SIGKILL because the error is not contained anymore. * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
*/ * to a process' address space, it's possible not all N VMAs
if (tk->addr == -EFAULT || tk->size_shift == 0) { * contain mappings for the page, but at least one VMA does.
* Only deliver SIGBUS with payload derived from the VMA that
* has a mapping for the page.
*/
if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n", pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm); page_to_pfn(p), tsk->comm);
tk->addr_valid = 0; } else if (tk->size_shift == 0) {
kfree(tk);
return;
} }
get_task_struct(tsk); get_task_struct(tsk);
tk->tsk = tsk; tk->tsk = tsk;
...@@ -369,7 +373,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail, ...@@ -369,7 +373,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* make sure the process doesn't catch the * make sure the process doesn't catch the
* signal and then access the memory. Just kill it. * signal and then access the memory. Just kill it.
*/ */
if (fail || tk->addr_valid == 0) { if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid); pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV, do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment