Commit 1ea7ca1b authored by Jane Chu's avatar Jane Chu Committed by Vishal Verma

dax: enable dax fault handler to report VM_FAULT_HWPOISON

When multiple processes mmap() a dax file, then at some point,
a process issues a 'load' and consumes a hwpoison, the process
receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
set for the poison scope. Soon after, any other process issues
a 'load' to the poisoned page (that is unmapped from the kernel
side by memory_failure), it receives a SIGBUS with
si_code = BUS_ADRERR and without valid si_lsb.

This is confusing to user, and is different from page fault due
to poison in RAM memory, also some helpful information is lost.

Channel dax backend driver's poison detection to the filesystem
such that instead of reporting VM_FAULT_SIGBUS, it could report
VM_FAULT_HWPOISON.

If user level block IO syscalls fail due to poison, the errno will
be converted to EIO to maintain block API consistency.
Signed-off-by: default avatarJane Chu <jane.chu@oracle.com>
Link: https://lore.kernel.org/r/20230615181325.1327259-2-jane.chu@oracle.comReviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarVishal Verma <vishal.l.verma@intel.com>
parent 95bf6df0
...@@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, ...@@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages) size_t nr_pages)
{ {
int ret;
if (!dax_alive(dax_dev)) if (!dax_alive(dax_dev))
return -ENXIO; return -ENXIO;
/* /*
...@@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, ...@@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
if (nr_pages != 1) if (nr_pages != 1)
return -EIO; return -EIO;
return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
return dax_mem2blk_err(ret);
} }
EXPORT_SYMBOL_GPL(dax_zero_page_range); EXPORT_SYMBOL_GPL(dax_zero_page_range);
......
...@@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, ...@@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
long actual_nr; long actual_nr;
if (mode != DAX_RECOVERY_WRITE) if (mode != DAX_RECOVERY_WRITE)
return -EIO; return -EHWPOISON;
/* /*
* Set the recovery stride is set to kernel page size because * Set the recovery stride is set to kernel page size because
......
...@@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, ...@@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
&kaddr, NULL); &kaddr, NULL);
if (rc < 0) if (rc < 0)
return rc; return dax_mem2blk_err(rc);
memset(kaddr, 0, nr_pages << PAGE_SHIFT); memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0; return 0;
......
...@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size, ...@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
if (!zero_edge) { if (!zero_edge) {
ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
if (ret) if (ret)
return ret; return dax_mem2blk_err(ret);
} }
if (copy_all) { if (copy_all) {
...@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) ...@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
out_unlock: out_unlock:
dax_read_unlock(id); dax_read_unlock(id);
return ret; return dax_mem2blk_err(ret);
} }
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
...@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) ...@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr,
NULL); NULL);
if (ret < 0) if (ret < 0)
return ret; return dax_mem2blk_err(ret);
memset(kaddr + offset, 0, size); memset(kaddr + offset, 0, size);
if (iomap->flags & IOMAP_F_SHARED) if (iomap->flags & IOMAP_F_SHARED)
ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap, ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
...@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, ...@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
DAX_ACCESS, &kaddr, NULL); DAX_ACCESS, &kaddr, NULL);
if (map_len == -EIO && iov_iter_rw(iter) == WRITE) { if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
map_len = dax_direct_access(dax_dev, pgoff, map_len = dax_direct_access(dax_dev, pgoff,
PHYS_PFN(size), DAX_RECOVERY_WRITE, PHYS_PFN(size), DAX_RECOVERY_WRITE,
&kaddr, NULL); &kaddr, NULL);
...@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, ...@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
recovery = true; recovery = true;
} }
if (map_len < 0) { if (map_len < 0) {
ret = map_len; ret = dax_mem2blk_err(map_len);
break; break;
} }
......
...@@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, ...@@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
NULL); NULL);
if (rc < 0) if (rc < 0)
return rc; return dax_mem2blk_err(rc);
memset(kaddr, 0, nr_pages << PAGE_SHIFT); memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0; return 0;
......
...@@ -261,6 +261,19 @@ static inline bool dax_mapping(struct address_space *mapping) ...@@ -261,6 +261,19 @@ static inline bool dax_mapping(struct address_space *mapping)
return mapping->host && IS_DAX(mapping->host); return mapping->host && IS_DAX(mapping->host);
} }
/*
* Due to dax's memory and block duo personalities, hwpoison reporting
* takes into consideration which personality is presently visible.
* When dax acts like a block device, such as in block IO, an encounter of
* dax hwpoison is reported as -EIO.
* When dax acts like memory, such as in page fault, a detection of hwpoison
* is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON.
*/
static inline int dax_mem2blk_err(int err)
{
return (err == -EHWPOISON) ? -EIO : err;
}
#ifdef CONFIG_DEV_DAX_HMEM_DEVICES #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
void hmem_register_resource(int target_nid, struct resource *r); void hmem_register_resource(int target_nid, struct resource *r);
#else #else
......
...@@ -3342,6 +3342,8 @@ static inline vm_fault_t vmf_error(int err) ...@@ -3342,6 +3342,8 @@ static inline vm_fault_t vmf_error(int err)
{ {
if (err == -ENOMEM) if (err == -ENOMEM)
return VM_FAULT_OOM; return VM_FAULT_OOM;
else if (err == -EHWPOISON)
return VM_FAULT_HWPOISON;
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment