Commit a3841f94 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm and dax updates from Dan Williams:
 "Save for a few late fixes, all of these commits have shipped in -next
  releases since before the merge window opened, and 0day has given a
  build success notification.

  The ext4 touches came from Jan, and the xfs touches have Darrick's
  reviewed-by. An xfstest for the MAP_SYNC feature has been through
  a few round of reviews and is on track to be merged.

   - Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
     'userspace flush' of persistent memory updates via filesystem-dax
     mappings. It arranges for any filesystem metadata updates that may
     be required to satisfy a write fault to also be flushed ("on disk")
     before the kernel returns to userspace from the fault handler.
     Effectively every write-fault that dirties metadata completes an
     fsync() before returning from the fault handler. The new
     MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
     is validated as supported by the filesystem's ->mmap() file
     operation.

   - Add support for the standard ACPI 6.2 label access methods that
     replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
     This enables interoperability with environments that only implement
     the standardized methods.

   - Add support for the ACPI 6.2 NVDIMM media error injection methods.

   - Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
     latch last shutdown status, firmware update, SMART error injection,
     and SMART alarm threshold control.

   - Cleanup physical address information disclosures to be root-only.

   - Fix revalidation of the DIMM "locked label area" status to support
     dynamic unlock of the label area.

   - Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
     (system-physical-address) command and error injection commands.

  Acknowledgements that came after the commits were pushed to -next:

   - 957ac8c4 ("dax: fix PMD faults on zero-length files"):
Reviewed-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>

   - a39e596b ("xfs: support for synchronous DAX faults") and
     7b565c9f ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
        Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"

* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
  acpi, nfit: add 'Enable Latch System Shutdown Status' command support
  dax: fix general protection fault in dax_alloc_inode
  dax: fix PMD faults on zero-length files
  dax: stop requiring a live device for dax_flush()
  brd: remove dax support
  dax: quiet bdev_dax_supported()
  fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
  tools/testing/nvdimm: unit test clear-error commands
  acpi, nfit: validate commands against the device type
  tools/testing/nvdimm: stricter bounds checking for error injection commands
  xfs: support for synchronous DAX faults
  xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
  ext4: Support for synchronous DAX faults
  ext4: Simplify error handling in ext4_dax_huge_fault()
  dax: Implement dax_finish_sync_fault()
  dax, iomap: Add support for synchronous faults
  mm: Define MAP_SYNC and VM_SYNC flags
  dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
  dax: Allow dax_iomap_fault() to return pfn
  dax: Fix comment describing dax_iomap_fault()
  ...
parents adeba81a 4247f24c
...@@ -4208,7 +4208,7 @@ L: linux-i2c@vger.kernel.org ...@@ -4208,7 +4208,7 @@ L: linux-i2c@vger.kernel.org
S: Maintained S: Maintained
F: drivers/i2c/busses/i2c-diolan-u2c.c F: drivers/i2c/busses/i2c-diolan-u2c.c
DIRECT ACCESS (DAX) FILESYSTEM DIRECT ACCESS (DAX)
M: Matthew Wilcox <mawilcox@microsoft.com> M: Matthew Wilcox <mawilcox@microsoft.com>
M: Ross Zwisler <ross.zwisler@linux.intel.com> M: Ross Zwisler <ross.zwisler@linux.intel.com>
L: linux-fsdevel@vger.kernel.org L: linux-fsdevel@vger.kernel.org
...@@ -4217,6 +4217,12 @@ F: fs/dax.c ...@@ -4217,6 +4217,12 @@ F: fs/dax.c
F: include/linux/dax.h F: include/linux/dax.h
F: include/trace/events/fs_dax.h F: include/trace/events/fs_dax.h
DEVICE DIRECT ACCESS (DAX)
M: Dan Williams <dan.j.williams@intel.com>
L: linux-nvdimm@lists.01.org
S: Supported
F: drivers/dax/
DIRECTORY NOTIFICATION (DNOTIFY) DIRECTORY NOTIFICATION (DNOTIFY)
M: Jan Kara <jack@suse.cz> M: Jan Kara <jack@suse.cz>
R: Amir Goldstein <amir73il@gmail.com> R: Amir Goldstein <amir73il@gmail.com>
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define MAP_SHARED 0x01 /* Share changes */ #define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
#define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ #define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */
#define MAP_FIXED 0x100 /* Interpret addr exactly */ #define MAP_FIXED 0x100 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x10 /* don't use a file */ #define MAP_ANONYMOUS 0x10 /* don't use a file */
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
*/ */
#define MAP_SHARED 0x001 /* Share changes */ #define MAP_SHARED 0x001 /* Share changes */
#define MAP_PRIVATE 0x002 /* Changes are private */ #define MAP_PRIVATE 0x002 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */
#define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_TYPE 0x00f /* Mask for type of mapping */
#define MAP_FIXED 0x010 /* Interpret addr exactly */ #define MAP_FIXED 0x010 /* Interpret addr exactly */
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define MAP_SHARED 0x01 /* Share changes */ #define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
#define MAP_TYPE 0x03 /* Mask for type of mapping */ #define MAP_TYPE 0x03 /* Mask for type of mapping */
#define MAP_FIXED 0x04 /* Interpret addr exactly */ #define MAP_FIXED 0x04 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x10 /* don't use a file */ #define MAP_ANONYMOUS 0x10 /* don't use a file */
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
*/ */
#define MAP_SHARED 0x001 /* Share changes */ #define MAP_SHARED 0x001 /* Share changes */
#define MAP_PRIVATE 0x002 /* Changes are private */ #define MAP_PRIVATE 0x002 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */
#define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_TYPE 0x00f /* Mask for type of mapping */
#define MAP_FIXED 0x010 /* Interpret addr exactly */ #define MAP_FIXED 0x010 /* Interpret addr exactly */
......
This diff is collapsed.
...@@ -67,7 +67,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, ...@@ -67,7 +67,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
continue; continue;
/* If this fails due to an -ENOMEM, there is little we can do */ /* If this fails due to an -ENOMEM, there is little we can do */
nvdimm_bus_add_poison(acpi_desc->nvdimm_bus, nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
ALIGN(mce->addr, L1_CACHE_BYTES), ALIGN(mce->addr, L1_CACHE_BYTES),
L1_CACHE_BYTES); L1_CACHE_BYTES);
nvdimm_region_notify(nfit_spa->nd_region, nvdimm_region_notify(nfit_spa->nd_region,
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
/* ACPI 6.1 */ /* ACPI 6.1 */
#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" #define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */ /* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */
#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ /* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */
...@@ -38,6 +38,37 @@ ...@@ -38,6 +38,37 @@
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
#define NVDIMM_STANDARD_CMDMASK \
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
| 1 << ND_CMD_GET_CONFIG_SIZE | 1 << ND_CMD_GET_CONFIG_DATA \
| 1 << ND_CMD_SET_CONFIG_DATA | 1 << ND_CMD_VENDOR_EFFECT_LOG_SIZE \
| 1 << ND_CMD_VENDOR_EFFECT_LOG | 1 << ND_CMD_VENDOR)
/*
* Command numbers that the kernel needs to know about to handle
* non-default DSM revision ids
*/
enum nvdimm_family_cmds {
NVDIMM_INTEL_LATCH_SHUTDOWN = 10,
NVDIMM_INTEL_GET_MODES = 11,
NVDIMM_INTEL_GET_FWINFO = 12,
NVDIMM_INTEL_START_FWUPDATE = 13,
NVDIMM_INTEL_SEND_FWUPDATE = 14,
NVDIMM_INTEL_FINISH_FWUPDATE = 15,
NVDIMM_INTEL_QUERY_FWUPDATE = 16,
NVDIMM_INTEL_SET_THRESHOLD = 17,
NVDIMM_INTEL_INJECT_ERROR = 18,
};
#define NVDIMM_INTEL_CMDMASK \
(NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \
| 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \
| 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \
| 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \
| 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN)
enum nfit_uuids { enum nfit_uuids {
/* for simplicity alias the uuid index with the family id */ /* for simplicity alias the uuid index with the family id */
NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
...@@ -140,6 +171,9 @@ struct nfit_mem { ...@@ -140,6 +171,9 @@ struct nfit_mem {
struct resource *flush_wpq; struct resource *flush_wpq;
unsigned long dsm_mask; unsigned long dsm_mask;
int family; int family;
u32 has_lsi:1;
u32 has_lsr:1;
u32 has_lsw:1;
}; };
struct acpi_nfit_desc { struct acpi_nfit_desc {
...@@ -167,6 +201,7 @@ struct acpi_nfit_desc { ...@@ -167,6 +201,7 @@ struct acpi_nfit_desc {
unsigned int init_complete:1; unsigned int init_complete:1;
unsigned long dimm_cmd_force_en; unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en; unsigned long bus_cmd_force_en;
unsigned long bus_nfit_cmd_force_en;
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw); void *iobuf, u64 len, int rw);
}; };
......
...@@ -302,7 +302,6 @@ config BLK_DEV_SX8 ...@@ -302,7 +302,6 @@ config BLK_DEV_SX8
config BLK_DEV_RAM config BLK_DEV_RAM
tristate "RAM block device support" tristate "RAM block device support"
select DAX if BLK_DEV_RAM_DAX
---help--- ---help---
Saying Y here will allow you to use a portion of your RAM memory as Saying Y here will allow you to use a portion of your RAM memory as
a block device, so that you can make file systems on it, read and a block device, so that you can make file systems on it, read and
...@@ -338,17 +337,6 @@ config BLK_DEV_RAM_SIZE ...@@ -338,17 +337,6 @@ config BLK_DEV_RAM_SIZE
The default value is 4096 kilobytes. Only change this if you know The default value is 4096 kilobytes. Only change this if you know
what you are doing. what you are doing.
config BLK_DEV_RAM_DAX
bool "Support Direct Access (DAX) to RAM block devices"
depends on BLK_DEV_RAM && FS_DAX
default n
help
Support filesystems using DAX to access RAM block devices. This
avoids double-buffering data in the page cache before copying it
to the block device. Answering Y will slightly enlarge the kernel,
and will prevent RAM block device backing store memory from being
allocated from highmem (only a problem for highmem systems).
config CDROM_PKTCDVD config CDROM_PKTCDVD
tristate "Packet writing on CD/DVD media (DEPRECATED)" tristate "Packet writing on CD/DVD media (DEPRECATED)"
depends on !UML depends on !UML
......
...@@ -21,11 +21,6 @@ ...@@ -21,11 +21,6 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#ifdef CONFIG_BLK_DEV_RAM_DAX
#include <linux/pfn_t.h>
#include <linux/dax.h>
#include <linux/uio.h>
#endif
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -45,9 +40,6 @@ struct brd_device { ...@@ -45,9 +40,6 @@ struct brd_device {
struct request_queue *brd_queue; struct request_queue *brd_queue;
struct gendisk *brd_disk; struct gendisk *brd_disk;
#ifdef CONFIG_BLK_DEV_RAM_DAX
struct dax_device *dax_dev;
#endif
struct list_head brd_list; struct list_head brd_list;
/* /*
...@@ -112,9 +104,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) ...@@ -112,9 +104,6 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
* restriction might be able to be lifted. * restriction might be able to be lifted.
*/ */
gfp_flags = GFP_NOIO | __GFP_ZERO; gfp_flags = GFP_NOIO | __GFP_ZERO;
#ifndef CONFIG_BLK_DEV_RAM_DAX
gfp_flags |= __GFP_HIGHMEM;
#endif
page = alloc_page(gfp_flags); page = alloc_page(gfp_flags);
if (!page) if (!page)
return NULL; return NULL;
...@@ -334,43 +323,6 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, ...@@ -334,43 +323,6 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
return err; return err;
} }
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
{
struct page *page;
if (!brd)
return -ENODEV;
page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
if (!page)
return -ENOSPC;
*kaddr = page_address(page);
*pfn = page_to_pfn_t(page);
return 1;
}
static long brd_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
struct brd_device *brd = dax_get_private(dax_dev);
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
}
static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations brd_dax_ops = {
.direct_access = brd_dax_direct_access,
.copy_from_iter = brd_dax_copy_from_iter,
};
#endif
static const struct block_device_operations brd_fops = { static const struct block_device_operations brd_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.rw_page = brd_rw_page, .rw_page = brd_rw_page,
...@@ -451,21 +403,8 @@ static struct brd_device *brd_alloc(int i) ...@@ -451,21 +403,8 @@ static struct brd_device *brd_alloc(int i)
set_capacity(disk, rd_size * 2); set_capacity(disk, rd_size * 2);
disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
#ifdef CONFIG_BLK_DEV_RAM_DAX
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops);
if (!brd->dax_dev)
goto out_free_inode;
#endif
return brd; return brd;
#ifdef CONFIG_BLK_DEV_RAM_DAX
out_free_inode:
kill_dax(brd->dax_dev);
put_dax(brd->dax_dev);
#endif
out_free_queue: out_free_queue:
blk_cleanup_queue(brd->brd_queue); blk_cleanup_queue(brd->brd_queue);
out_free_dev: out_free_dev:
...@@ -505,10 +444,6 @@ static struct brd_device *brd_init_one(int i, bool *new) ...@@ -505,10 +444,6 @@ static struct brd_device *brd_init_one(int i, bool *new)
static void brd_del_one(struct brd_device *brd) static void brd_del_one(struct brd_device *brd)
{ {
list_del(&brd->brd_list); list_del(&brd->brd_list);
#ifdef CONFIG_BLK_DEV_RAM_DAX
kill_dax(brd->dax_dev);
put_dax(brd->dax_dev);
#endif
del_gendisk(brd->brd_disk); del_gendisk(brd->brd_disk);
brd_free(brd); brd_free(brd);
} }
......
...@@ -222,7 +222,8 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, ...@@ -222,7 +222,8 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size) unsigned long size)
{ {
struct resource *res; struct resource *res;
phys_addr_t phys; /* gcc-4.6.3-nolibc for i386 complains that this is uninitialized */
phys_addr_t uninitialized_var(phys);
int i; int i;
for (i = 0; i < dev_dax->num_resources; i++) { for (i = 0; i < dev_dax->num_resources; i++) {
......
...@@ -92,21 +92,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) ...@@ -92,21 +92,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
long len; long len;
if (blocksize != PAGE_SIZE) { if (blocksize != PAGE_SIZE) {
pr_err("VFS (%s): error: unsupported blocksize for dax\n", pr_debug("VFS (%s): error: unsupported blocksize for dax\n",
sb->s_id); sb->s_id);
return -EINVAL; return -EINVAL;
} }
err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
if (err) { if (err) {
pr_err("VFS (%s): error: unaligned partition for dax\n", pr_debug("VFS (%s): error: unaligned partition for dax\n",
sb->s_id); sb->s_id);
return err; return err;
} }
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
if (!dax_dev) { if (!dax_dev) {
pr_err("VFS (%s): error: device does not support dax\n", pr_debug("VFS (%s): error: device does not support dax\n",
sb->s_id); sb->s_id);
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -118,7 +118,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) ...@@ -118,7 +118,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
put_dax(dax_dev); put_dax(dax_dev);
if (len < 1) { if (len < 1) {
pr_err("VFS (%s): error: dax access failed (%ld)", pr_debug("VFS (%s): error: dax access failed (%ld)\n",
sb->s_id, len); sb->s_id, len);
return len < 0 ? len : -EIO; return len < 0 ? len : -EIO;
} }
...@@ -273,9 +273,6 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter); ...@@ -273,9 +273,6 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
void arch_wb_cache_pmem(void *addr, size_t size); void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
{ {
if (unlikely(!dax_alive(dax_dev)))
return;
if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
return; return;
...@@ -344,6 +341,9 @@ static struct inode *dax_alloc_inode(struct super_block *sb) ...@@ -344,6 +341,9 @@ static struct inode *dax_alloc_inode(struct super_block *sb)
struct inode *inode; struct inode *inode;
dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
if (!dax_dev)
return NULL;
inode = &dax_dev->inode; inode = &dax_dev->inode;
inode->i_rdev = 0; inode->i_rdev = 0;
return inode; return inode;
......
...@@ -21,6 +21,7 @@ libnvdimm-y += region_devs.o ...@@ -21,6 +21,7 @@ libnvdimm-y += region_devs.o
libnvdimm-y += region.o libnvdimm-y += region.o
libnvdimm-y += namespace_devs.o libnvdimm-y += namespace_devs.o
libnvdimm-y += label.o libnvdimm-y += label.o
libnvdimm-y += badrange.o
libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_BTT) += btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
......
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/libnvdimm.h>
#include <linux/badblocks.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/device.h>
#include <linux/ctype.h>
#include <linux/ndctl.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/io.h>
#include "nd-core.h"
#include "nd.h"
void badrange_init(struct badrange *badrange)
{
INIT_LIST_HEAD(&badrange->list);
spin_lock_init(&badrange->lock);
}
EXPORT_SYMBOL_GPL(badrange_init);
static void append_badrange_entry(struct badrange *badrange,
struct badrange_entry *bre, u64 addr, u64 length)
{
lockdep_assert_held(&badrange->lock);
bre->start = addr;
bre->length = length;
list_add_tail(&bre->list, &badrange->list);
}
static int alloc_and_append_badrange_entry(struct badrange *badrange,
u64 addr, u64 length, gfp_t flags)
{
struct badrange_entry *bre;
bre = kzalloc(sizeof(*bre), flags);
if (!bre)
return -ENOMEM;
append_badrange_entry(badrange, bre, addr, length);
return 0;
}
static int add_badrange(struct badrange *badrange, u64 addr, u64 length)
{
struct badrange_entry *bre, *bre_new;
spin_unlock(&badrange->lock);
bre_new = kzalloc(sizeof(*bre_new), GFP_KERNEL);
spin_lock(&badrange->lock);
if (list_empty(&badrange->list)) {
if (!bre_new)
return -ENOMEM;
append_badrange_entry(badrange, bre_new, addr, length);
return 0;
}
/*
* There is a chance this is a duplicate, check for those first.
* This will be the common case as ARS_STATUS returns all known
* errors in the SPA space, and we can't query it per region
*/
list_for_each_entry(bre, &badrange->list, list)
if (bre->start == addr) {
/* If length has changed, update this list entry */
if (bre->length != length)
bre->length = length;
kfree(bre_new);
return 0;
}
/*
* If not a duplicate or a simple length update, add the entry as is,
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
if (!bre_new)
return -ENOMEM;
append_badrange_entry(badrange, bre_new, addr, length);
return 0;
}
int badrange_add(struct badrange *badrange, u64 addr, u64 length)
{
int rc;
spin_lock(&badrange->lock);
rc = add_badrange(badrange, addr, length);
spin_unlock(&badrange->lock);
return rc;
}
EXPORT_SYMBOL_GPL(badrange_add);
void badrange_forget(struct badrange *badrange, phys_addr_t start,
unsigned int len)
{
struct list_head *badrange_list = &badrange->list;
u64 clr_end = start + len - 1;
struct badrange_entry *bre, *next;
spin_lock(&badrange->lock);
/*
* [start, clr_end] is the badrange interval being cleared.
* [bre->start, bre_end] is the badrange_list entry we're comparing
* the above interval against. The badrange list entry may need
* to be modified (update either start or length), deleted, or
* split into two based on the overlap characteristics
*/
list_for_each_entry_safe(bre, next, badrange_list, list) {
u64 bre_end = bre->start + bre->length - 1;
/* Skip intervals with no intersection */
if (bre_end < start)
continue;
if (bre->start > clr_end)
continue;
/* Delete completely overlapped badrange entries */
if ((bre->start >= start) && (bre_end <= clr_end)) {
list_del(&bre->list);
kfree(bre);
continue;
}
/* Adjust start point of partially cleared entries */
if ((start <= bre->start) && (clr_end > bre->start)) {
bre->length -= clr_end - bre->start + 1;
bre->start = clr_end + 1;
continue;
}
/* Adjust bre->length for partial clearing at the tail end */
if ((bre->start < start) && (bre_end <= clr_end)) {
/* bre->start remains the same */
bre->length = start - bre->start;
continue;
}
/*
* If clearing in the middle of an entry, we split it into
* two by modifying the current entry to represent one half of
* the split, and adding a new entry for the second half.
*/
if ((bre->start < start) && (bre_end > clr_end)) {
u64 new_start = clr_end + 1;
u64 new_len = bre_end - new_start + 1;
/* Add new entry covering the right half */
alloc_and_append_badrange_entry(badrange, new_start,
new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */
bre->length = start - bre->start;
continue;
}
}
spin_unlock(&badrange->lock);
}
EXPORT_SYMBOL_GPL(badrange_forget);
static void set_badblock(struct badblocks *bb, sector_t s, int num)
{
dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n",
(u64) s * 512, (u64) num * 512);
/* this isn't an error as the hardware will still throw an exception */
if (badblocks_set(bb, s, num, 1))
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
__func__, (u64) s);
}
/**
* __add_badblock_range() - Convert a physical address range to bad sectors
* @bb: badblocks instance to populate
* @ns_offset: namespace offset where the error range begins (in bytes)
* @len: number of bytes of badrange to be added
*
* This assumes that the range provided with (ns_offset, len) is within
* the bounds of physical addresses for this namespace, i.e. lies in the
* interval [ns_start, ns_start + ns_size)
*/
static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
{
const unsigned int sector_size = 512;
sector_t start_sector, end_sector;
u64 num_sectors;
u32 rem;
start_sector = div_u64(ns_offset, sector_size);
end_sector = div_u64_rem(ns_offset + len, sector_size, &rem);
if (rem)
end_sector++;
num_sectors = end_sector - start_sector;
if (unlikely(num_sectors > (u64)INT_MAX)) {
u64 remaining = num_sectors;
sector_t s = start_sector;
while (remaining) {
int done = min_t(u64, remaining, INT_MAX);
set_badblock(bb, s, done);
remaining -= done;
s += done;
}
} else
set_badblock(bb, start_sector, num_sectors);
}
static void badblocks_populate(struct badrange *badrange,
struct badblocks *bb, const struct resource *res)
{
struct badrange_entry *bre;
if (list_empty(&badrange->list))
return;
list_for_each_entry(bre, &badrange->list, list) {
u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */
if (bre_end < res->start)
continue;
if (bre->start > res->end)
continue;
/* Deal with any overlap after start of the namespace */
if (bre->start >= res->start) {
u64 start = bre->start;
u64 len;
if (bre_end <= res->end)
len = bre->length;
else
len = res->start + resource_size(res)
- bre->start;
__add_badblock_range(bb, start - res->start, len);
continue;
}
/*
* Deal with overlap for badrange starting before
* the namespace.
*/
if (bre->start < res->start) {
u64 len;
if (bre_end < res->end)
len = bre->start + bre->length - res->start;
else
len = resource_size(res);
__add_badblock_range(bb, 0, len);
}
}
}
/**
* nvdimm_badblocks_populate() - Convert a list of badranges to badblocks
* @region: parent region of the range to interrogate
* @bb: badblocks instance to populate
* @res: resource range to consider
*
* The badrange list generated during bus initialization may contain
* multiple, possibly overlapping physical address ranges. Compare each
* of these ranges to the resource range currently being initialized,
* and add badblocks entries for all matching sub-ranges
*/
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res)
{
struct nvdimm_bus *nvdimm_bus;
if (!is_memory(&nd_region->dev)) {
dev_WARN_ONCE(&nd_region->dev, 1,
"%s only valid for pmem regions\n", __func__);
return;
}
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev);
badblocks_populate(&nvdimm_bus->badrange, bb, res);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
* General Public License for more details. * General Public License for more details.
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/libnvdimm.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -221,7 +222,7 @@ static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus, ...@@ -221,7 +222,7 @@ static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus,
phys_addr_t phys, u64 cleared) phys_addr_t phys, u64 cleared)
{ {
if (cleared > 0) if (cleared > 0)
nvdimm_forget_poison(nvdimm_bus, phys, cleared); badrange_forget(&nvdimm_bus->badrange, phys, cleared);
if (cleared > 0 && cleared / 512) if (cleared > 0 && cleared / 512)
nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared); nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared);
...@@ -344,11 +345,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, ...@@ -344,11 +345,10 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
return NULL; return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list); INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->mapping_list); INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
INIT_LIST_HEAD(&nvdimm_bus->poison_list);
init_waitqueue_head(&nvdimm_bus->probe_wait); init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex); mutex_init(&nvdimm_bus->reconfig_mutex);
spin_lock_init(&nvdimm_bus->poison_lock); badrange_init(&nvdimm_bus->badrange);
if (nvdimm_bus->id < 0) { if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus); kfree(nvdimm_bus);
return NULL; return NULL;
...@@ -395,15 +395,15 @@ static int child_unregister(struct device *dev, void *data) ...@@ -395,15 +395,15 @@ static int child_unregister(struct device *dev, void *data)
return 0; return 0;
} }
static void free_poison_list(struct list_head *poison_list) static void free_badrange_list(struct list_head *badrange_list)
{ {
struct nd_poison *pl, *next; struct badrange_entry *bre, *next;
list_for_each_entry_safe(pl, next, poison_list, list) { list_for_each_entry_safe(bre, next, badrange_list, list) {
list_del(&pl->list); list_del(&bre->list);
kfree(pl); kfree(bre);
} }
list_del_init(poison_list); list_del_init(badrange_list);
} }
static int nd_bus_remove(struct device *dev) static int nd_bus_remove(struct device *dev)
...@@ -417,9 +417,9 @@ static int nd_bus_remove(struct device *dev) ...@@ -417,9 +417,9 @@ static int nd_bus_remove(struct device *dev)
nd_synchronize(); nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
spin_lock(&nvdimm_bus->poison_lock); spin_lock(&nvdimm_bus->badrange.lock);
free_poison_list(&nvdimm_bus->poison_list); free_badrange_list(&nvdimm_bus->badrange.list);
spin_unlock(&nvdimm_bus->poison_lock); spin_unlock(&nvdimm_bus->badrange.lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus); nvdimm_bus_destroy_ndctl(nvdimm_bus);
......
...@@ -398,265 +398,11 @@ struct attribute_group nvdimm_bus_attribute_group = { ...@@ -398,265 +398,11 @@ struct attribute_group nvdimm_bus_attribute_group = {
}; };
EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
static void set_badblock(struct badblocks *bb, sector_t s, int num) int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{ {
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", return badrange_add(&nvdimm_bus->badrange, addr, length);
(u64) s * 512, (u64) num * 512);
/* this isn't an error as the hardware will still throw an exception */
if (badblocks_set(bb, s, num, 1))
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
__func__, (u64) s);
} }
EXPORT_SYMBOL_GPL(nvdimm_bus_add_badrange);
/**
* __add_badblock_range() - Convert a physical address range to bad sectors
* @bb: badblocks instance to populate
* @ns_offset: namespace offset where the error range begins (in bytes)
* @len: number of bytes of poison to be added
*
* This assumes that the range provided with (ns_offset, len) is within
* the bounds of physical addresses for this namespace, i.e. lies in the
* interval [ns_start, ns_start + ns_size)
*/
static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
{
const unsigned int sector_size = 512;
sector_t start_sector, end_sector;
u64 num_sectors;
u32 rem;
start_sector = div_u64(ns_offset, sector_size);
end_sector = div_u64_rem(ns_offset + len, sector_size, &rem);
if (rem)
end_sector++;
num_sectors = end_sector - start_sector;
if (unlikely(num_sectors > (u64)INT_MAX)) {
u64 remaining = num_sectors;
sector_t s = start_sector;
while (remaining) {
int done = min_t(u64, remaining, INT_MAX);
set_badblock(bb, s, done);
remaining -= done;
s += done;
}
} else
set_badblock(bb, start_sector, num_sectors);
}
static void badblocks_populate(struct list_head *poison_list,
struct badblocks *bb, const struct resource *res)
{
struct nd_poison *pl;
if (list_empty(poison_list))
return;
list_for_each_entry(pl, poison_list, list) {
u64 pl_end = pl->start + pl->length - 1;
/* Discard intervals with no intersection */
if (pl_end < res->start)
continue;
if (pl->start > res->end)
continue;
/* Deal with any overlap after start of the namespace */
if (pl->start >= res->start) {
u64 start = pl->start;
u64 len;
if (pl_end <= res->end)
len = pl->length;
else
len = res->start + resource_size(res)
- pl->start;
__add_badblock_range(bb, start - res->start, len);
continue;
}
/* Deal with overlap for poison starting before the namespace */
if (pl->start < res->start) {
u64 len;
if (pl_end < res->end)
len = pl->start + pl->length - res->start;
else
len = resource_size(res);
__add_badblock_range(bb, 0, len);
}
}
}
/**
* nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks
* @region: parent region of the range to interrogate
* @bb: badblocks instance to populate
* @res: resource range to consider
*
* The poison list generated during bus initialization may contain
* multiple, possibly overlapping physical address ranges. Compare each
* of these ranges to the resource range currently being initialized,
* and add badblocks entries for all matching sub-ranges
*/
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res)
{
struct nvdimm_bus *nvdimm_bus;
struct list_head *poison_list;
if (!is_memory(&nd_region->dev)) {
dev_WARN_ONCE(&nd_region->dev, 1,
"%s only valid for pmem regions\n", __func__);
return;
}
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
poison_list = &nvdimm_bus->poison_list;
nvdimm_bus_lock(&nvdimm_bus->dev);
badblocks_populate(poison_list, bb, res);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
static void append_poison_entry(struct nvdimm_bus *nvdimm_bus,
struct nd_poison *pl, u64 addr, u64 length)
{
lockdep_assert_held(&nvdimm_bus->poison_lock);
pl->start = addr;
pl->length = length;
list_add_tail(&pl->list, &nvdimm_bus->poison_list);
}
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags)
{
struct nd_poison *pl;
pl = kzalloc(sizeof(*pl), flags);
if (!pl)
return -ENOMEM;
append_poison_entry(nvdimm_bus, pl, addr, length);
return 0;
}
static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
struct nd_poison *pl, *pl_new;
spin_unlock(&nvdimm_bus->poison_lock);
pl_new = kzalloc(sizeof(*pl_new), GFP_KERNEL);
spin_lock(&nvdimm_bus->poison_lock);
if (list_empty(&nvdimm_bus->poison_list)) {
if (!pl_new)
return -ENOMEM;
append_poison_entry(nvdimm_bus, pl_new, addr, length);
return 0;
}
/*
* There is a chance this is a duplicate, check for those first.
* This will be the common case as ARS_STATUS returns all known
* errors in the SPA space, and we can't query it per region
*/
list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
if (pl->start == addr) {
/* If length has changed, update this list entry */
if (pl->length != length)
pl->length = length;
kfree(pl_new);
return 0;
}
/*
* If not a duplicate or a simple length update, add the entry as is,
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
if (!pl_new)
return -ENOMEM;
append_poison_entry(nvdimm_bus, pl_new, addr, length);
return 0;
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
int rc;
spin_lock(&nvdimm_bus->poison_lock);
rc = bus_add_poison(nvdimm_bus, addr, length);
spin_unlock(&nvdimm_bus->poison_lock);
return rc;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start,
unsigned int len)
{
struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1;
struct nd_poison *pl, *next;
spin_lock(&nvdimm_bus->poison_lock);
WARN_ON_ONCE(list_empty(poison_list));
/*
* [start, clr_end] is the poison interval being cleared.
* [pl->start, pl_end] is the poison_list entry we're comparing
* the above interval against. The poison list entry may need
* to be modified (update either start or length), deleted, or
* split into two based on the overlap characteristics
*/
list_for_each_entry_safe(pl, next, poison_list, list) {
u64 pl_end = pl->start + pl->length - 1;
/* Skip intervals with no intersection */
if (pl_end < start)
continue;
if (pl->start > clr_end)
continue;
/* Delete completely overlapped poison entries */
if ((pl->start >= start) && (pl_end <= clr_end)) {
list_del(&pl->list);
kfree(pl);
continue;
}
/* Adjust start point of partially cleared entries */
if ((start <= pl->start) && (clr_end > pl->start)) {
pl->length -= clr_end - pl->start + 1;
pl->start = clr_end + 1;
continue;
}
/* Adjust pl->length for partial clearing at the tail end */
if ((pl->start < start) && (pl_end <= clr_end)) {
/* pl->start remains the same */
pl->length = start - pl->start;
continue;
}
/*
* If clearing in the middle of an entry, we split it into
* two by modifying the current entry to represent one half of
* the split, and adding a new entry for the second half.
*/
if ((pl->start < start) && (pl_end > clr_end)) {
u64 new_start = clr_end + 1;
u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */
add_poison(nvdimm_bus, new_start, new_len, GFP_NOWAIT);
/* Adjust this entry to cover the left half */
pl->length = start - pl->start;
continue;
}
}
spin_unlock(&nvdimm_bus->poison_lock);
}
EXPORT_SYMBOL_GPL(nvdimm_forget_poison);
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
......
...@@ -55,6 +55,8 @@ static int nvdimm_probe(struct device *dev) ...@@ -55,6 +55,8 @@ static int nvdimm_probe(struct device *dev)
goto err; goto err;
rc = nvdimm_init_config_data(ndd); rc = nvdimm_init_config_data(ndd);
if (rc == -EACCES)
nvdimm_set_locked(dev);
if (rc) if (rc)
goto err; goto err;
...@@ -68,6 +70,7 @@ static int nvdimm_probe(struct device *dev) ...@@ -68,6 +70,7 @@ static int nvdimm_probe(struct device *dev)
rc = nd_label_reserve_dpa(ndd); rc = nd_label_reserve_dpa(ndd);
if (ndd->ns_current >= 0) if (ndd->ns_current >= 0)
nvdimm_set_aliasing(dev); nvdimm_set_aliasing(dev);
nvdimm_clear_locked(dev);
nvdimm_bus_unlock(dev); nvdimm_bus_unlock(dev);
if (rc) if (rc)
......
...@@ -200,6 +200,13 @@ void nvdimm_set_locked(struct device *dev) ...@@ -200,6 +200,13 @@ void nvdimm_set_locked(struct device *dev)
set_bit(NDD_LOCKED, &nvdimm->flags); set_bit(NDD_LOCKED, &nvdimm->flags);
} }
void nvdimm_clear_locked(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
clear_bit(NDD_LOCKED, &nvdimm->flags);
}
static void nvdimm_release(struct device *dev) static void nvdimm_release(struct device *dev)
{ {
struct nvdimm *nvdimm = to_nvdimm(dev); struct nvdimm *nvdimm = to_nvdimm(dev);
...@@ -324,6 +331,17 @@ static ssize_t commands_show(struct device *dev, ...@@ -324,6 +331,17 @@ static ssize_t commands_show(struct device *dev,
} }
static DEVICE_ATTR_RO(commands); static DEVICE_ATTR_RO(commands);
static ssize_t flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
return sprintf(buf, "%s%s\n",
test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
}
static DEVICE_ATTR_RO(flags);
static ssize_t state_show(struct device *dev, struct device_attribute *attr, static ssize_t state_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
...@@ -365,6 +383,7 @@ static DEVICE_ATTR_RO(available_slots); ...@@ -365,6 +383,7 @@ static DEVICE_ATTR_RO(available_slots);
static struct attribute *nvdimm_attributes[] = { static struct attribute *nvdimm_attributes[] = {
&dev_attr_state.attr, &dev_attr_state.attr,
&dev_attr_flags.attr,
&dev_attr_commands.attr, &dev_attr_commands.attr,
&dev_attr_available_slots.attr, &dev_attr_available_slots.attr,
NULL, NULL,
......
...@@ -1050,7 +1050,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels) ...@@ -1050,7 +1050,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
nsindex = to_namespace_index(ndd, 0); nsindex = to_namespace_index(ndd, 0);
memset(nsindex, 0, ndd->nsarea.config_size); memset(nsindex, 0, ndd->nsarea.config_size);
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {
int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT); int rc = nd_label_write_index(ndd, i, 3 - i, ND_NSINDEX_INIT);
if (rc) if (rc)
return rc; return rc;
......
...@@ -1620,7 +1620,7 @@ static umode_t namespace_visible(struct kobject *kobj, ...@@ -1620,7 +1620,7 @@ static umode_t namespace_visible(struct kobject *kobj,
if (a == &dev_attr_resource.attr) { if (a == &dev_attr_resource.attr) {
if (is_namespace_blk(dev)) if (is_namespace_blk(dev))
return 0; return 0;
return a->mode; return 0400;
} }
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
...@@ -1875,7 +1875,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) ...@@ -1875,7 +1875,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
* @nspm: target namespace to create * @nspm: target namespace to create
* @nd_label: target pmem namespace label to evaluate * @nd_label: target pmem namespace label to evaluate
*/ */
struct device *create_namespace_pmem(struct nd_region *nd_region, static struct device *create_namespace_pmem(struct nd_region *nd_region,
struct nd_namespace_index *nsindex, struct nd_namespace_index *nsindex,
struct nd_namespace_label *nd_label) struct nd_namespace_label *nd_label)
{ {
...@@ -2186,7 +2186,7 @@ static int add_namespace_resource(struct nd_region *nd_region, ...@@ -2186,7 +2186,7 @@ static int add_namespace_resource(struct nd_region *nd_region,
return i; return i;
} }
struct device *create_namespace_blk(struct nd_region *nd_region, static struct device *create_namespace_blk(struct nd_region *nd_region,
struct nd_namespace_label *nd_label, int count) struct nd_namespace_label *nd_label, int count)
{ {
......
...@@ -29,10 +29,9 @@ struct nvdimm_bus { ...@@ -29,10 +29,9 @@ struct nvdimm_bus {
struct list_head list; struct list_head list;
struct device dev; struct device dev;
int id, probe_active; int id, probe_active;
struct list_head poison_list;
struct list_head mapping_list; struct list_head mapping_list;
struct mutex reconfig_mutex; struct mutex reconfig_mutex;
spinlock_t poison_lock; struct badrange badrange;
}; };
struct nvdimm { struct nvdimm {
......
...@@ -34,12 +34,6 @@ enum { ...@@ -34,12 +34,6 @@ enum {
NVDIMM_IO_ATOMIC = 1, NVDIMM_IO_ATOMIC = 1,
}; };
struct nd_poison {
u64 start;
u64 length;
struct list_head list;
};
struct nvdimm_drvdata { struct nvdimm_drvdata {
struct device *dev; struct device *dev;
int nslabel_size; int nslabel_size;
...@@ -254,6 +248,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, ...@@ -254,6 +248,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len); unsigned int len);
void nvdimm_set_aliasing(struct device *dev); void nvdimm_set_aliasing(struct device *dev);
void nvdimm_set_locked(struct device *dev); void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
struct nd_btt *to_nd_btt(struct device *dev); struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb { struct nd_gen_sb {
......
...@@ -282,8 +282,16 @@ static struct attribute *nd_pfn_attributes[] = { ...@@ -282,8 +282,16 @@ static struct attribute *nd_pfn_attributes[] = {
NULL, NULL,
}; };
static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n)
{
if (a == &dev_attr_resource.attr)
return 0400;
return a->mode;
}
struct attribute_group nd_pfn_attribute_group = { struct attribute_group nd_pfn_attribute_group = {
.attrs = nd_pfn_attributes, .attrs = nd_pfn_attributes,
.is_visible = pfn_visible,
}; };
static const struct attribute_group *nd_pfn_attribute_groups[] = { static const struct attribute_group *nd_pfn_attribute_groups[] = {
......
...@@ -562,8 +562,12 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) ...@@ -562,8 +562,12 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr)
return 0; return 0;
if (!is_nd_pmem(dev) && a == &dev_attr_resource.attr) if (a == &dev_attr_resource.attr) {
return 0; if (is_nd_pmem(dev))
return 0400;
else
return 0;
}
if (a == &dev_attr_deep_flush.attr) { if (a == &dev_attr_deep_flush.attr) {
int has_flush = nvdimm_has_flush(nd_region); int has_flush = nvdimm_has_flush(nd_region);
......
This diff is collapsed.
...@@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf) ...@@ -100,7 +100,7 @@ static int ext2_dax_fault(struct vm_fault *vmf)
} }
down_read(&ei->dax_sem); down_read(&ei->dax_sem);
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &ext2_iomap_ops); ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, &ext2_iomap_ops);
up_read(&ei->dax_sem); up_read(&ei->dax_sem);
if (vmf->flags & FAULT_FLAG_WRITE) if (vmf->flags & FAULT_FLAG_WRITE)
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/quotaops.h> #include <linux/quotaops.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/mman.h>
#include "ext4.h" #include "ext4.h"
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
...@@ -297,6 +298,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, ...@@ -297,6 +298,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
*/ */
bool write = (vmf->flags & FAULT_FLAG_WRITE) && bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED); (vmf->vma->vm_flags & VM_SHARED);
pfn_t pfn;
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
...@@ -304,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, ...@@ -304,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
down_read(&EXT4_I(inode)->i_mmap_sem); down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb)); EXT4_DATA_TRANS_BLOCKS(sb));
if (IS_ERR(handle)) {
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
return VM_FAULT_SIGBUS;
}
} else { } else {
down_read(&EXT4_I(inode)->i_mmap_sem); down_read(&EXT4_I(inode)->i_mmap_sem);
} }
if (!IS_ERR(handle)) result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
else
result = VM_FAULT_SIGBUS;
if (write) { if (write) {
if (!IS_ERR(handle)) ext4_journal_stop(handle);
ext4_journal_stop(handle); /* Handling synchronous page fault? */
if (result & VM_FAULT_NEEDDSYNC)
result = dax_finish_sync_fault(vmf, pe_size, pfn);
up_read(&EXT4_I(inode)->i_mmap_sem); up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else { } else {
...@@ -351,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -351,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO; return -EIO;
/*
* We don't support synchronous mappings for non-DAX files. At least
* until someone comes with a sensible use case.
*/
if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
return -EOPNOTSUPP;
file_accessed(file); file_accessed(file);
if (IS_DAX(file_inode(file))) { if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops; vma->vm_ops = &ext4_dax_vm_ops;
...@@ -469,6 +482,7 @@ const struct file_operations ext4_file_operations = { ...@@ -469,6 +482,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl, .compat_ioctl = ext4_compat_ioctl,
#endif #endif
.mmap = ext4_file_mmap, .mmap = ext4_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = ext4_file_open, .open = ext4_file_open,
.release = ext4_release_file, .release = ext4_release_file,
.fsync = ext4_sync_file, .fsync = ext4_sync_file,
......
...@@ -3384,6 +3384,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait) ...@@ -3384,6 +3384,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page); return try_to_free_buffers(page);
} }
static bool ext4_inode_datasync_dirty(struct inode *inode)
{
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
if (journal)
return !jbd2_transaction_committed(journal,
EXT4_I(inode)->i_datasync_tid);
/* Any metadata buffers to write? */
if (!list_empty(&inode->i_mapping->private_list))
return true;
return inode->i_state & I_DIRTY_DATASYNC;
}
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap) unsigned flags, struct iomap *iomap)
{ {
...@@ -3497,6 +3510,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, ...@@ -3497,6 +3510,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
} }
iomap->flags = 0; iomap->flags = 0;
if (ext4_inode_datasync_dirty(inode))
iomap->flags |= IOMAP_F_DIRTY;
iomap->bdev = inode->i_sb->s_bdev; iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = sbi->s_daxdev; iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits; iomap->offset = first_block << blkbits;
......
...@@ -737,6 +737,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) ...@@ -737,6 +737,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
return err; return err;
} }
/* Return 1 when transaction with given tid has already committed. */
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
{
int ret = 1;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction &&
journal->j_running_transaction->t_tid == tid)
ret = 0;
if (journal->j_committing_transaction &&
journal->j_committing_transaction->t_tid == tid)
ret = 0;
read_unlock(&journal->j_state_lock);
return ret;
}
EXPORT_SYMBOL(jbd2_transaction_committed);
/* /*
* When this function returns the transaction corresponding to tid * When this function returns the transaction corresponding to tid
* will be completed. If the transaction has currently running, start * will be completed. If the transaction has currently running, start
......
...@@ -661,6 +661,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) ...@@ -661,6 +661,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_ACCOUNT)] = "ac", [ilog2(VM_ACCOUNT)] = "ac",
[ilog2(VM_NORESERVE)] = "nr", [ilog2(VM_NORESERVE)] = "nr",
[ilog2(VM_HUGETLB)] = "ht", [ilog2(VM_HUGETLB)] = "ht",
[ilog2(VM_SYNC)] = "sf",
[ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_WIPEONFORK)] = "wf",
[ilog2(VM_DONTDUMP)] = "dd", [ilog2(VM_DONTDUMP)] = "dd",
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <linux/falloc.h> #include <linux/falloc.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/mman.h>
static const struct vm_operations_struct xfs_file_vm_ops; static const struct vm_operations_struct xfs_file_vm_ops;
...@@ -1045,7 +1046,11 @@ __xfs_filemap_fault( ...@@ -1045,7 +1046,11 @@ __xfs_filemap_fault(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); pfn_t pfn;
ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
} else { } else {
if (write_fault) if (write_fault)
ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
...@@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite( ...@@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite(
} }
/* /*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp * pfn_mkwrite was originally intended to ensure we capture time stamp updates
* updates on write faults. In reality, it's need to serialise against * on write faults. In reality, it needs to serialise against truncate and
* truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED * prepare memory for writing so handle is as standard write fault.
* to ensure we serialise the fault barrier in place.
*/ */
static int static int
xfs_filemap_pfn_mkwrite( xfs_filemap_pfn_mkwrite(
struct vm_fault *vmf) struct vm_fault *vmf)
{ {
struct inode *inode = file_inode(vmf->vma->vm_file); return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
struct xfs_inode *ip = XFS_I(inode);
int ret = VM_FAULT_NOPAGE;
loff_t size;
trace_xfs_filemap_pfn_mkwrite(ip);
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
/* check if the faulting page hasn't raced with truncate */
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
else if (IS_DAX(inode))
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
} }
static const struct vm_operations_struct xfs_file_vm_ops = { static const struct vm_operations_struct xfs_file_vm_ops = {
...@@ -1136,6 +1120,13 @@ xfs_file_mmap( ...@@ -1136,6 +1120,13 @@ xfs_file_mmap(
struct file *filp, struct file *filp,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
/*
* We don't support synchronous mappings for non-DAX files. At least
* until someone comes with a sensible use case.
*/
if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
return -EOPNOTSUPP;
file_accessed(filp); file_accessed(filp);
vma->vm_ops = &xfs_file_vm_ops; vma->vm_ops = &xfs_file_vm_ops;
if (IS_DAX(file_inode(filp))) if (IS_DAX(file_inode(filp)))
...@@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = { ...@@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = {
.compat_ioctl = xfs_file_compat_ioctl, .compat_ioctl = xfs_file_compat_ioctl,
#endif #endif
.mmap = xfs_file_mmap, .mmap = xfs_file_mmap,
.mmap_supported_flags = MAP_SYNC,
.open = xfs_file_open, .open = xfs_file_open,
.release = xfs_file_release, .release = xfs_file_release,
.fsync = xfs_file_fsync, .fsync = xfs_file_fsync,
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "xfs_error.h" #include "xfs_error.h"
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_inode_item.h"
#include "xfs_iomap.h" #include "xfs_iomap.h"
#include "xfs_trace.h" #include "xfs_trace.h"
#include "xfs_icache.h" #include "xfs_icache.h"
...@@ -1089,6 +1090,10 @@ xfs_file_iomap_begin( ...@@ -1089,6 +1090,10 @@ xfs_file_iomap_begin(
trace_xfs_iomap_found(ip, offset, length, 0, &imap); trace_xfs_iomap_found(ip, offset, length, 0, &imap);
} }
if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
& ~XFS_ILOG_TIMESTAMP))
iomap->flags |= IOMAP_F_DIRTY;
xfs_bmbt_to_iomap(ip, iomap, &imap); xfs_bmbt_to_iomap(ip, iomap, &imap);
if (shared) if (shared)
......
...@@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); ...@@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid);
DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
TRACE_EVENT(xfs_filemap_fault, TRACE_EVENT(xfs_filemap_fault,
TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size,
bool write_fault), bool write_fault),
......
...@@ -96,7 +96,9 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); ...@@ -96,7 +96,9 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
const struct iomap_ops *ops); pfn_t *pfnp, const struct iomap_ops *ops);
int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
......
...@@ -1702,6 +1702,7 @@ struct file_operations { ...@@ -1702,6 +1702,7 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *); int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *); int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id); int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *); int (*release) (struct inode *, struct file *);
......
...@@ -21,9 +21,13 @@ struct vm_fault; ...@@ -21,9 +21,13 @@ struct vm_fault;
/* /*
* Flags for all iomap mappings: * Flags for all iomap mappings:
*
* IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access
* written data and requires fdatasync to commit them to persistent storage.
*/ */
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
#define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */ #define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */
#define IOMAP_F_DIRTY 0x04 /* uncommitted metadata */
/* /*
* Flags that only need to be reported for IOMAP_REPORT requests: * Flags that only need to be reported for IOMAP_REPORT requests:
......
...@@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid); ...@@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid);
int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
int jbd2_transaction_committed(journal_t *journal, tid_t tid);
int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_complete_transaction(journal_t *journal, tid_t tid);
int jbd2_log_do_checkpoint(journal_t *journal); int jbd2_log_do_checkpoint(journal_t *journal);
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
......
...@@ -18,6 +18,18 @@ ...@@ -18,6 +18,18 @@
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/uuid.h> #include <linux/uuid.h>
#include <linux/spinlock.h>
struct badrange_entry {
u64 start;
u64 length;
struct list_head list;
};
struct badrange {
struct list_head list;
spinlock_t lock;
};
enum { enum {
/* when a dimm supports both PMEM and BLK access a label is required */ /* when a dimm supports both PMEM and BLK access a label is required */
...@@ -129,9 +141,12 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( ...@@ -129,9 +141,12 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
} }
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); void badrange_init(struct badrange *badrange);
void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, int badrange_add(struct badrange *badrange, u64 addr, u64 length);
phys_addr_t start, unsigned int len); void badrange_forget(struct badrange *badrange, phys_addr_t start,
unsigned int len);
int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr,
u64 length);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc); struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
......
...@@ -199,6 +199,7 @@ extern unsigned int kobjsize(const void *objp); ...@@ -199,6 +199,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_SYNC 0x00800000 /* Synchronous page faults */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ #define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
...@@ -1191,8 +1192,9 @@ static inline void clear_page_pfmemalloc(struct page *page) ...@@ -1191,8 +1192,9 @@ static inline void clear_page_pfmemalloc(struct page *page)
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
#define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */
#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ * and needs fsync() to complete (for
* synchronous page faults in DAX) */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
...@@ -1210,7 +1212,8 @@ static inline void clear_page_pfmemalloc(struct page *page) ...@@ -1210,7 +1212,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
{ VM_FAULT_LOCKED, "LOCKED" }, \ { VM_FAULT_LOCKED, "LOCKED" }, \
{ VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_RETRY, "RETRY" }, \
{ VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \
{ VM_FAULT_DONE_COW, "DONE_COW" } { VM_FAULT_DONE_COW, "DONE_COW" }, \
{ VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }
/* Encode hstate index for a hwpoisoned large page */ /* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12) #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
......
...@@ -8,6 +8,48 @@ ...@@ -8,6 +8,48 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <uapi/linux/mman.h> #include <uapi/linux/mman.h>
/*
* Arrange for legacy / undefined architecture specific flags to be
* ignored by mmap handling code.
*/
#ifndef MAP_32BIT
#define MAP_32BIT 0
#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB 0
#endif
#ifndef MAP_HUGE_1GB
#define MAP_HUGE_1GB 0
#endif
#ifndef MAP_UNINITIALIZED
#define MAP_UNINITIALIZED 0
#endif
#ifndef MAP_SYNC
#define MAP_SYNC 0
#endif
/*
* The historical set of flags that all mmap implementations implicitly
* support when a ->mmap_validate() op is not provided in file_operations.
*/
#define LEGACY_MAP_MASK (MAP_SHARED \
| MAP_PRIVATE \
| MAP_FIXED \
| MAP_ANONYMOUS \
| MAP_DENYWRITE \
| MAP_EXECUTABLE \
| MAP_UNINITIALIZED \
| MAP_GROWSDOWN \
| MAP_LOCKED \
| MAP_NORESERVE \
| MAP_POPULATE \
| MAP_NONBLOCK \
| MAP_STACK \
| MAP_HUGETLB \
| MAP_32BIT \
| MAP_HUGE_2MB \
| MAP_HUGE_1GB)
extern int sysctl_overcommit_memory; extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio; extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes; extern unsigned long sysctl_overcommit_kbytes;
...@@ -64,8 +106,9 @@ static inline bool arch_validate_prot(unsigned long prot) ...@@ -64,8 +106,9 @@ static inline bool arch_validate_prot(unsigned long prot)
* ("bit1" and "bit2" must be single bits) * ("bit1" and "bit2" must be single bits)
*/ */
#define _calc_vm_trans(x, bit1, bit2) \ #define _calc_vm_trans(x, bit1, bit2) \
((!(bit1) || !(bit2)) ? 0 : \
((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
: ((x) & (bit1)) / ((bit1) / (bit2))) : ((x) & (bit1)) / ((bit1) / (bit2))))
/* /*
* Combine the mmap "prot" argument into "vm_flags" used internally. * Combine the mmap "prot" argument into "vm_flags" used internally.
...@@ -87,7 +130,8 @@ calc_vm_flag_bits(unsigned long flags) ...@@ -87,7 +130,8 @@ calc_vm_flag_bits(unsigned long flags)
{ {
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC );
} }
unsigned long vm_commit_limit(void); unsigned long vm_commit_limit(void);
......
...@@ -149,7 +149,6 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \ ...@@ -149,7 +149,6 @@ DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
TP_ARGS(inode, vmf, length, pfn, radix_entry)) TP_ARGS(inode, vmf, length, pfn, radix_entry))
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping); DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
DECLARE_EVENT_CLASS(dax_pte_fault_class, DECLARE_EVENT_CLASS(dax_pte_fault_class,
TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result), TP_PROTO(struct inode *inode, struct vm_fault *vmf, int result),
...@@ -192,6 +191,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \ ...@@ -192,6 +191,8 @@ DEFINE_EVENT(dax_pte_fault_class, name, \
DEFINE_PTE_FAULT_EVENT(dax_pte_fault); DEFINE_PTE_FAULT_EVENT(dax_pte_fault);
DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done); DEFINE_PTE_FAULT_EVENT(dax_pte_fault_done);
DEFINE_PTE_FAULT_EVENT(dax_load_hole); DEFINE_PTE_FAULT_EVENT(dax_load_hole);
DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite_no_entry);
DEFINE_PTE_FAULT_EVENT(dax_insert_pfn_mkwrite);
TRACE_EVENT(dax_insert_mapping, TRACE_EVENT(dax_insert_mapping,
TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry), TP_PROTO(struct inode *inode, struct vm_fault *vmf, void *radix_entry),
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */ #define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
#define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_TYPE 0x0f /* Mask for type of mapping */
#define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_FIXED 0x10 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_ANONYMOUS 0x20 /* don't use a file */
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#define MAP_NONBLOCK 0x10000 /* do not block on IO */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */
#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */
/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
......
...@@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr, ...@@ -1387,9 +1387,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (file) { if (file) {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
unsigned long flags_mask;
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
switch (flags & MAP_TYPE) { switch (flags & MAP_TYPE) {
case MAP_SHARED: case MAP_SHARED:
/*
* Force use of MAP_SHARED_VALIDATE with non-legacy
* flags. E.g. MAP_SYNC is dangerous to use with
* MAP_SHARED as you don't know which consistency model
* you will get. We silently ignore unsupported flags
* with MAP_SHARED to preserve backward compatibility.
*/
flags &= LEGACY_MAP_MASK;
/* fall through */
case MAP_SHARED_VALIDATE:
if (flags & ~flags_mask)
return -EOPNOTSUPP;
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
return -EACCES; return -EACCES;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */ #define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
#define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_TYPE 0x0f /* Mask for type of mapping */
#define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_FIXED 0x10 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_ANONYMOUS 0x20 /* don't use a file */
......
...@@ -70,6 +70,7 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o ...@@ -70,6 +70,7 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
libnvdimm-y += $(NVDIMM_SRC)/region.o libnvdimm-y += $(NVDIMM_SRC)/region.o
libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
libnvdimm-y += $(NVDIMM_SRC)/label.o libnvdimm-y += $(NVDIMM_SRC)/label.o
libnvdimm-y += $(NVDIMM_SRC)/badrange.o
libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
......
This diff is collapsed.
...@@ -32,6 +32,58 @@ struct nfit_test_resource { ...@@ -32,6 +32,58 @@ struct nfit_test_resource {
void *buf; void *buf;
}; };
#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2
#define NFIT_ARS_INJECT_INVALID 2
enum err_inj_options {
ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
};
/* nfit commands */
enum nfit_cmd_num {
NFIT_CMD_TRANSLATE_SPA = 5,
NFIT_CMD_ARS_INJECT_SET = 7,
NFIT_CMD_ARS_INJECT_CLEAR = 8,
NFIT_CMD_ARS_INJECT_GET = 9,
};
struct nd_cmd_translate_spa {
__u64 spa;
__u32 status;
__u8 flags;
__u8 _reserved[3];
__u64 translate_length;
__u32 num_nvdimms;
struct nd_nvdimm_device {
__u32 nfit_device_handle;
__u32 _reserved;
__u64 dpa;
} __packed devices[0];
} __packed;
struct nd_cmd_ars_err_inj {
__u64 err_inj_spa_range_base;
__u64 err_inj_spa_range_length;
__u8 err_inj_options;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_clr {
__u64 err_inj_clr_spa_range_base;
__u64 err_inj_clr_spa_range_length;
__u32 status;
} __packed;
struct nd_cmd_ars_err_inj_stat {
__u32 status;
__u32 inj_err_rec_count;
struct nd_error_stat_query_record {
__u64 err_inj_stat_spa_range_base;
__u64 err_inj_stat_spa_range_length;
} __packed record[0];
} __packed;
union acpi_object; union acpi_object;
typedef void *acpi_handle; typedef void *acpi_handle;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment