Commit d09ba131 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "Aside from the recently added pmem sub-division support these have
  been in -next for several releases with no reported issues. The sub-
  division support was included in next-20161010 with no reported
  issues. It passes all unit tests including new tests for all the new
  functionality below.

  Summary:

   - PMEM sub-division support: Allow a single PMEM region to be divided
     into multiple namespaces. Originally, ~2 years ago, it was thought
     that partitions of a /dev/pmemX block device could handle
     sub-allocations of persistent memory for different use cases. With
     the decision to not support DAX mappings of raw block-devices, and
     the genesis of device-dax, the need for having multiple
     pmem-namespace per region has grown.

   - Device-DAX unified inode: In support of dynamic-resizing of a
     device-dax instance the kernel arranges for all mappings of a
     device-dax node to share the same inode. This allows unmap /
     truncate / invalidation events to affect all instances of the
     device similar to the behavior of mmap on block devices.

   - Hardware error scrubbing reworks: The original address-range-scrub
     and badblocks tracking solution allowed clearing entries at the
     individual namespace level, but it failed to clear the internal
     list of media errors maintained at the bus level. The result was
     that the next scrub or namespace disable/re-enable event would
     restore the cleared badblocks, but now that is fixed. The v4.8
     kernel introduced an auto-scrub-on-machine-check behavior to
     repopulate the badblocks list. Now, in v4.9, the auto-scrub
     behavior can be disabled and simply arrange for the error reported
     in the machine-check to be added to the list.

   - DIMM health-event notification support: ACPI 6.1 defines a
     notification event code that can be send to ACPI NVDIMM devices. A
     poll(2) capable file descriptor for these events can be obtained
     from the nmemX/nfit/flags sysfs-attribute of a libnvdimm memory
     device.

   - Miscellaneous fixes: NVDIMM-N probe error, device-dax build error,
     and a change to dedup the flush hint list to not flush the memory
     controller more than necessary"

* tag 'libnvdimm-for-4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (39 commits)
  /dev/dax: fix Kconfig dependency build breakage
  dax: use correct dev_t value
  dax: convert devm_create_dax_dev to PTR_ERR
  libnvdimm, namespace: allow creation of multiple pmem-namespaces per region
  libnvdimm, namespace: lift single pmem limit in scan_labels()
  libnvdimm, namespace: filter out of range labels in scan_labels()
  libnvdimm, namespace: enable allocation of multiple pmem namespaces
  libnvdimm, namespace: update label implementation for multi-pmem
  libnvdimm, namespace: expand pmem device naming scheme for multi-pmem
  libnvdimm, region: update nd_region_available_dpa() for multi-pmem support
  libnvdimm, namespace: sort namespaces by dpa at init
  libnvdimm, namespace: allow multiple pmem-namespaces per region at scan time
  tools/testing/nvdimm: support for sub-dividing a pmem region
  libnvdimm, namespace: unify blk and pmem label scanning
  libnvdimm, namespace: refactor uuid_show() into a namespace_to_uuid() helper
  libnvdimm, label: convert label tracking to a linked list
  libnvdimm, region: move region-mapping input-paramters to nd_mapping_desc
  nvdimm: reduce duplicated wpq flushes
  libnvdimm: clear the internal poison_list when clearing badblocks
  pmem: reduce kmap_atomic sections to the memcpys only
  ...
parents f29135b5 e476f944
......@@ -886,6 +886,58 @@ static ssize_t revision_show(struct device *dev,
}
static DEVICE_ATTR_RO(revision);
static ssize_t hw_error_scrub_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
return sprintf(buf, "%d\n", acpi_desc->scrub_mode);
}
/*
* The 'hw_error_scrub' attribute can have the following values written to it:
* '0': Switch to the default mode where an exception will only insert
* the address of the memory error into the poison and badblocks lists.
* '1': Enable a full scrub to happen if an exception for a memory error is
* received.
*/
static ssize_t hw_error_scrub_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t size)
{
struct nvdimm_bus_descriptor *nd_desc;
ssize_t rc;
long val;
rc = kstrtol(buf, 0, &val);
if (rc)
return rc;
device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
switch (val) {
case HW_ERROR_SCRUB_ON:
acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON;
break;
case HW_ERROR_SCRUB_OFF:
acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF;
break;
default:
rc = -EINVAL;
break;
}
}
device_unlock(dev);
if (rc)
return rc;
return size;
}
static DEVICE_ATTR_RW(hw_error_scrub);
/*
* This shows the number of full Address Range Scrubs that have been
* completed since driver load time. Userspace can wait on this using
......@@ -958,6 +1010,7 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
static struct attribute *acpi_nfit_attributes[] = {
&dev_attr_revision.attr,
&dev_attr_scrub.attr,
&dev_attr_hw_error_scrub.attr,
NULL,
};
......@@ -1256,6 +1309,44 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
return NULL;
}
void __acpi_nvdimm_notify(struct device *dev, u32 event)
{
struct nfit_mem *nfit_mem;
struct acpi_nfit_desc *acpi_desc;
dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
event);
if (event != NFIT_NOTIFY_DIMM_HEALTH) {
dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev),
event);
return;
}
acpi_desc = dev_get_drvdata(dev->parent);
if (!acpi_desc)
return;
/*
* If we successfully retrieved acpi_desc, then we know nfit_mem data
* is still valid.
*/
nfit_mem = dev_get_drvdata(dev);
if (nfit_mem && nfit_mem->flags_attr)
sysfs_notify_dirent(nfit_mem->flags_attr);
}
EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify);
static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
{
struct acpi_device *adev = data;
struct device *dev = &adev->dev;
device_lock(dev->parent);
__acpi_nvdimm_notify(dev, event);
device_unlock(dev->parent);
}
static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem, u32 device_handle)
{
......@@ -1280,6 +1371,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
return force_enable_dimms ? 0 : -ENODEV;
}
if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle,
ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) {
dev_err(dev, "%s: notification registration failed\n",
dev_name(&adev_dimm->dev));
return -ENXIO;
}
/*
* Until standardization materializes we need to consider 4
* different command sets. Note, that checking for function0 (bit0)
......@@ -1318,18 +1416,41 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
return 0;
}
static void shutdown_dimm_notify(void *data)
{
struct acpi_nfit_desc *acpi_desc = data;
struct nfit_mem *nfit_mem;
mutex_lock(&acpi_desc->init_mutex);
/*
* Clear out the nfit_mem->flags_attr and shut down dimm event
* notifications.
*/
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
struct acpi_device *adev_dimm = nfit_mem->adev;
if (nfit_mem->flags_attr) {
sysfs_put(nfit_mem->flags_attr);
nfit_mem->flags_attr = NULL;
}
if (adev_dimm)
acpi_remove_notify_handler(adev_dimm->handle,
ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
}
mutex_unlock(&acpi_desc->init_mutex);
}
static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
{
struct nfit_mem *nfit_mem;
int dimm_count = 0;
int dimm_count = 0, rc;
struct nvdimm *nvdimm;
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
struct acpi_nfit_flush_address *flush;
unsigned long flags = 0, cmd_mask;
struct nvdimm *nvdimm;
u32 device_handle;
u16 mem_flags;
int rc;
device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
......@@ -1382,7 +1503,30 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
}
return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
if (rc)
return rc;
/*
* Now that dimms are successfully registered, and async registration
* is flushed, attempt to enable event notification.
*/
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
struct kernfs_node *nfit_kernfs;
nvdimm = nfit_mem->nvdimm;
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
"flags");
sysfs_put(nfit_kernfs);
if (!nfit_mem->flags_attr)
dev_warn(acpi_desc->dev, "%s: notifications disabled\n",
nvdimm_name(nvdimm));
}
return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify,
acpi_desc);
}
static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
......@@ -1491,9 +1635,9 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
if (!info)
return -ENOMEM;
for (i = 0; i < nr; i++) {
struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nfit_set_info_map *map = &info->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
struct nvdimm *nvdimm = mapping->nvdimm;
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
spa->range_index, i);
......@@ -1917,7 +2061,7 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
}
static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc,
struct acpi_nfit_memory_map *memdev,
struct nfit_spa *nfit_spa)
{
......@@ -1934,12 +2078,12 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
return -ENODEV;
}
nd_mapping->nvdimm = nvdimm;
mapping->nvdimm = nvdimm;
switch (nfit_spa_type(spa)) {
case NFIT_SPA_PM:
case NFIT_SPA_VOLATILE:
nd_mapping->start = memdev->address;
nd_mapping->size = memdev->region_size;
mapping->start = memdev->address;
mapping->size = memdev->region_size;
break;
case NFIT_SPA_DCR:
nfit_mem = nvdimm_provider_data(nvdimm);
......@@ -1947,13 +2091,13 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
spa->range_index, nvdimm_name(nvdimm));
} else {
nd_mapping->size = nfit_mem->bdw->capacity;
nd_mapping->start = nfit_mem->bdw->start_address;
mapping->size = nfit_mem->bdw->capacity;
mapping->start = nfit_mem->bdw->start_address;
ndr_desc->num_lanes = nfit_mem->bdw->windows;
blk_valid = 1;
}
ndr_desc->nd_mapping = nd_mapping;
ndr_desc->mapping = mapping;
ndr_desc->num_mappings = blk_valid;
ndbr_desc = to_blk_region_desc(ndr_desc);
ndbr_desc->enable = acpi_nfit_blk_region_enable;
......@@ -1979,7 +2123,7 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa)
{
static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
struct acpi_nfit_system_address *spa = nfit_spa->spa;
struct nd_blk_region_desc ndbr_desc;
struct nd_region_desc *ndr_desc;
......@@ -1998,7 +2142,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
}
memset(&res, 0, sizeof(res));
memset(&nd_mappings, 0, sizeof(nd_mappings));
memset(&mappings, 0, sizeof(mappings));
memset(&ndbr_desc, 0, sizeof(ndbr_desc));
res.start = spa->address;
res.end = res.start + spa->length - 1;
......@@ -2014,7 +2158,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
struct nd_mapping *nd_mapping;
struct nd_mapping_desc *mapping;
if (memdev->range_index != spa->range_index)
continue;
......@@ -2023,14 +2167,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
spa->range_index, ND_MAX_MAPPINGS);
return -ENXIO;
}
nd_mapping = &nd_mappings[count++];
rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
mapping = &mappings[count++];
rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc,
memdev, nfit_spa);
if (rc)
goto out;
}
ndr_desc->nd_mapping = nd_mappings;
ndr_desc->mapping = mappings;
ndr_desc->num_mappings = count;
rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
if (rc)
......@@ -2678,29 +2822,30 @@ static int acpi_nfit_remove(struct acpi_device *adev)
return 0;
}
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
struct device *dev = &adev->dev;
union acpi_object *obj;
acpi_status status;
int ret;
dev_dbg(dev, "%s: event: %d\n", __func__, event);
device_lock(dev);
if (event != NFIT_NOTIFY_UPDATE)
return;
if (!dev->driver) {
/* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__);
goto out_unlock;
return;
}
if (!acpi_desc) {
acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
if (!acpi_desc)
goto out_unlock;
acpi_nfit_desc_init(acpi_desc, &adev->dev);
return;
acpi_nfit_desc_init(acpi_desc, dev);
} else {
/*
* Finish previous registration before considering new
......@@ -2710,10 +2855,10 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
}
/* Evaluate _FIT */
status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
status = acpi_evaluate_object(handle, "_FIT", NULL, &buf);
if (ACPI_FAILURE(status)) {
dev_err(dev, "failed to evaluate _FIT\n");
goto out_unlock;
return;
}
obj = buf.pointer;
......@@ -2725,9 +2870,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
} else
dev_err(dev, "Invalid _FIT\n");
kfree(buf.pointer);
}
EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
out_unlock:
device_unlock(dev);
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
device_lock(&adev->dev);
__acpi_nfit_notify(&adev->dev, adev->handle, event);
device_unlock(&adev->dev);
}
static const struct acpi_device_id acpi_nfit_ids[] = {
......
......@@ -14,6 +14,7 @@
*/
#include <linux/notifier.h>
#include <linux/acpi.h>
#include <linux/nd.h>
#include <asm/mce.h>
#include "nfit.h"
......@@ -62,13 +63,26 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
}
mutex_unlock(&acpi_desc->init_mutex);
if (!found_match)
continue;
/* If this fails due to an -ENOMEM, there is little we can do */
nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
ALIGN(mce->addr, L1_CACHE_BYTES),
L1_CACHE_BYTES);
nvdimm_region_notify(nfit_spa->nd_region,
NVDIMM_REVALIDATE_POISON);
if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
/*
* We can ignore an -EBUSY here because if an ARS is already
* in progress, just let that be the last authoritative one
* We can ignore an -EBUSY here because if an ARS is
* already in progress, just let that be the last
* authoritative one
*/
if (found_match)
acpi_nfit_ars_rescan(acpi_desc);
}
break;
}
mutex_unlock(&acpi_desc_lock);
return NOTIFY_DONE;
......
......@@ -78,6 +78,14 @@ enum {
NFIT_ARS_TIMEOUT = 90,
};
enum nfit_root_notifiers {
NFIT_NOTIFY_UPDATE = 0x80,
};
enum nfit_dimm_notifiers {
NFIT_NOTIFY_DIMM_HEALTH = 0x81,
};
struct nfit_spa {
struct list_head list;
struct nd_region *nd_region;
......@@ -124,6 +132,7 @@ struct nfit_mem {
struct acpi_nfit_system_address *spa_bdw;
struct acpi_nfit_interleave *idt_dcr;
struct acpi_nfit_interleave *idt_bdw;
struct kernfs_node *flags_attr;
struct nfit_flush *nfit_flush;
struct list_head list;
struct acpi_device *adev;
......@@ -152,6 +161,7 @@ struct acpi_nfit_desc {
struct list_head list;
struct kernfs_node *scrub_count_state;
unsigned int scrub_count;
unsigned int scrub_mode;
unsigned int cancel:1;
unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en;
......@@ -159,6 +169,11 @@ struct acpi_nfit_desc {
void *iobuf, u64 len, int rw);
};
enum scrub_mode {
HW_ERROR_SCRUB_OFF,
HW_ERROR_SCRUB_ON,
};
enum nd_blk_mmio_selector {
BDW,
DCR,
......@@ -223,5 +238,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
const u8 *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
void __acpi_nvdimm_notify(struct device *dev, u32 event);
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
#endif /* __NFIT_H__ */
......@@ -23,4 +23,9 @@ config DEV_DAX_PMEM
Say Y if unsure
config NR_DEV_DAX
int "Maximum number of Device-DAX instances"
default 32768
range 256 2147483647
endif
......@@ -13,15 +13,25 @@
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/mount.h>
#include <linux/pfn_t.h>
#include <linux/hash.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include "dax.h"
static int dax_major;
static dev_t dax_devt;
static struct class *dax_class;
static DEFINE_IDA(dax_minor_ida);
static int nr_dax = CONFIG_NR_DEV_DAX;
module_param(nr_dax, int, S_IRUGO);
static struct vfsmount *dax_mnt;
static struct kmem_cache *dax_cache __read_mostly;
static struct super_block *dax_superblock __read_mostly;
MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");
/**
* struct dax_region - mapping infrastructure for dax devices
......@@ -48,7 +58,7 @@ struct dax_region {
* struct dax_dev - subdivision of a dax region
* @region - parent region
* @dev - device backing the character device
* @kref - enable this data to be tracked in filp->private_data
* @cdev - core chardev data
* @alive - !alive + rcu grace period == no new mappings can be established
* @id - child id in the region
* @num_resources - number of physical address extents in this device
......@@ -56,50 +66,151 @@ struct dax_region {
*/
struct dax_dev {
struct dax_region *region;
struct device *dev;
struct kref kref;
struct inode *inode;
struct device dev;
struct cdev cdev;
bool alive;
int id;
int num_resources;
struct resource res[0];
};
static void dax_region_free(struct kref *kref)
static struct inode *dax_alloc_inode(struct super_block *sb)
{
struct dax_region *dax_region;
return kmem_cache_alloc(dax_cache, GFP_KERNEL);
}
dax_region = container_of(kref, struct dax_region, kref);
kfree(dax_region);
static void dax_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(dax_cache, inode);
}
void dax_region_put(struct dax_region *dax_region)
static void dax_destroy_inode(struct inode *inode)
{
kref_put(&dax_region->kref, dax_region_free);
call_rcu(&inode->i_rcu, dax_i_callback);
}
EXPORT_SYMBOL_GPL(dax_region_put);
static void dax_dev_free(struct kref *kref)
static const struct super_operations dax_sops = {
.statfs = simple_statfs,
.alloc_inode = dax_alloc_inode,
.destroy_inode = dax_destroy_inode,
.drop_inode = generic_delete_inode,
};
static struct dentry *dax_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
struct dax_dev *dax_dev;
return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
}
dax_dev = container_of(kref, struct dax_dev, kref);
dax_region_put(dax_dev->region);
kfree(dax_dev);
static struct file_system_type dax_type = {
.name = "dax",
.mount = dax_mount,
.kill_sb = kill_anon_super,
};
static int dax_test(struct inode *inode, void *data)
{
return inode->i_cdev == data;
}
static int dax_set(struct inode *inode, void *data)
{
inode->i_cdev = data;
return 0;
}
static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt)
{
struct inode *inode;
inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
dax_test, dax_set, cdev);
if (!inode)
return NULL;
if (inode->i_state & I_NEW) {
inode->i_mode = S_IFCHR;
inode->i_flags = S_DAX;
inode->i_rdev = devt;
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
unlock_new_inode(inode);
}
return inode;
}
static void init_once(void *inode)
{
inode_init_once(inode);
}
static int dax_inode_init(void)
{
int rc;
dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (!dax_cache)
return -ENOMEM;
rc = register_filesystem(&dax_type);
if (rc)
goto err_register_fs;
dax_mnt = kern_mount(&dax_type);
if (IS_ERR(dax_mnt)) {
rc = PTR_ERR(dax_mnt);
goto err_mount;
}
dax_superblock = dax_mnt->mnt_sb;
return 0;
err_mount:
unregister_filesystem(&dax_type);
err_register_fs:
kmem_cache_destroy(dax_cache);
return rc;
}
static void dax_inode_exit(void)
{
kern_unmount(dax_mnt);
unregister_filesystem(&dax_type);
kmem_cache_destroy(dax_cache);
}
static void dax_dev_put(struct dax_dev *dax_dev)
static void dax_region_free(struct kref *kref)
{
kref_put(&dax_dev->kref, dax_dev_free);
struct dax_region *dax_region;
dax_region = container_of(kref, struct dax_region, kref);
kfree(dax_region);
}
void dax_region_put(struct dax_region *dax_region)
{
kref_put(&dax_region->kref, dax_region_free);
}
EXPORT_SYMBOL_GPL(dax_region_put);
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, unsigned int align, void *addr,
unsigned long pfn_flags)
{
struct dax_region *dax_region;
dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
if (!IS_ALIGNED(res->start, align)
|| !IS_ALIGNED(resource_size(res), align))
return NULL;
dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
if (!dax_region)
return NULL;
......@@ -116,10 +227,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
}
EXPORT_SYMBOL_GPL(alloc_dax_region);
static struct dax_dev *to_dax_dev(struct device *dev)
{
return container_of(dev, struct dax_dev, dev);
}
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_dev *dax_dev = dev_get_drvdata(dev);
struct dax_dev *dax_dev = to_dax_dev(dev);
unsigned long long size = 0;
int i;
......@@ -144,180 +260,11 @@ static const struct attribute_group *dax_attribute_groups[] = {
NULL,
};
static void unregister_dax_dev(void *_dev)
{
struct device *dev = _dev;
struct dax_dev *dax_dev = dev_get_drvdata(dev);
struct dax_region *dax_region = dax_dev->region;
dev_dbg(dev, "%s\n", __func__);
/*
* Note, rcu is not protecting the liveness of dax_dev, rcu is
* ensuring that any fault handlers that might have seen
* dax_dev->alive == true, have completed. Any fault handlers
* that start after synchronize_rcu() has started will abort
* upon seeing dax_dev->alive == false.
*/
dax_dev->alive = false;
synchronize_rcu();
get_device(dev);
device_unregister(dev);
ida_simple_remove(&dax_region->ida, dax_dev->id);
ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
put_device(dev);
dax_dev_put(dax_dev);
}
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
int count)
{
struct device *parent = dax_region->dev;
struct dax_dev *dax_dev;
struct device *dev;
int rc, minor;
dev_t dev_t;
dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
if (!dax_dev)
return -ENOMEM;
memcpy(dax_dev->res, res, sizeof(*res) * count);
dax_dev->num_resources = count;
kref_init(&dax_dev->kref);
dax_dev->alive = true;
dax_dev->region = dax_region;
kref_get(&dax_region->kref);
dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
if (dax_dev->id < 0) {
rc = dax_dev->id;
goto err_id;
}
minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
if (minor < 0) {
rc = minor;
goto err_minor;
}
dev_t = MKDEV(dax_major, minor);
dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
dax_attribute_groups, "dax%d.%d", dax_region->id,
dax_dev->id);
if (IS_ERR(dev)) {
rc = PTR_ERR(dev);
goto err_create;
}
dax_dev->dev = dev;
rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
if (rc)
return rc;
return 0;
err_create:
ida_simple_remove(&dax_minor_ida, minor);
err_minor:
ida_simple_remove(&dax_region->ida, dax_dev->id);
err_id:
dax_dev_put(dax_dev);
return rc;
}
EXPORT_SYMBOL_GPL(devm_create_dax_dev);
/* return an unmapped area aligned to the dax region specified alignment */
static unsigned long dax_dev_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
unsigned long off, off_end, off_align, len_align, addr_align, align;
struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
struct dax_region *dax_region;
if (!dax_dev || addr)
goto out;
dax_region = dax_dev->region;
align = dax_region->align;
off = pgoff << PAGE_SHIFT;
off_end = off + len;
off_align = round_up(off, align);
if ((off_end <= off_align) || ((off_end - off_align) < align))
goto out;
len_align = len + align;
if ((off + len_align) < off)
goto out;
addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
pgoff, flags);
if (!IS_ERR_VALUE(addr_align)) {
addr_align += (off - addr_align) & (align - 1);
return addr_align;
}
out:
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
static int __match_devt(struct device *dev, const void *data)
{
const dev_t *devt = data;
return dev->devt == *devt;
}
static struct device *dax_dev_find(dev_t dev_t)
{
return class_find_device(dax_class, NULL, &dev_t, __match_devt);
}
static int dax_dev_open(struct inode *inode, struct file *filp)
{
struct dax_dev *dax_dev = NULL;
struct device *dev;
dev = dax_dev_find(inode->i_rdev);
if (!dev)
return -ENXIO;
device_lock(dev);
dax_dev = dev_get_drvdata(dev);
if (dax_dev) {
dev_dbg(dev, "%s\n", __func__);
filp->private_data = dax_dev;
kref_get(&dax_dev->kref);
inode->i_flags = S_DAX;
}
device_unlock(dev);
if (!dax_dev) {
put_device(dev);
return -ENXIO;
}
return 0;
}
static int dax_dev_release(struct inode *inode, struct file *filp)
{
struct dax_dev *dax_dev = filp->private_data;
struct device *dev = dax_dev->dev;
dev_dbg(dax_dev->dev, "%s\n", __func__);
dax_dev_put(dax_dev);
put_device(dev);
return 0;
}
static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
const char *func)
{
struct dax_region *dax_region = dax_dev->region;
struct device *dev = dax_dev->dev;
struct device *dev = &dax_dev->dev;
unsigned long mask;
if (!dax_dev->alive)
......@@ -382,7 +329,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long) vmf->virtual_address;
struct device *dev = dax_dev->dev;
struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
int rc = VM_FAULT_SIGBUS;
phys_addr_t phys;
......@@ -422,7 +369,7 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
current->comm, (vmf->flags & FAULT_FLAG_WRITE)
? "write" : "read", vma->vm_start, vma->vm_end);
rcu_read_lock();
......@@ -437,7 +384,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
unsigned int flags)
{
unsigned long pmd_addr = addr & PMD_MASK;
struct device *dev = dax_dev->dev;
struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
phys_addr_t phys;
pgoff_t pgoff;
......@@ -479,7 +426,7 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
current->comm, (flags & FAULT_FLAG_WRITE)
? "write" : "read", vma->vm_start, vma->vm_end);
......@@ -490,81 +437,257 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return rc;
}
static void dax_dev_vm_open(struct vm_area_struct *vma)
{
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(dax_dev->dev, "%s\n", __func__);
kref_get(&dax_dev->kref);
}
static void dax_dev_vm_close(struct vm_area_struct *vma)
{
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(dax_dev->dev, "%s\n", __func__);
dax_dev_put(dax_dev);
}
static const struct vm_operations_struct dax_dev_vm_ops = {
.fault = dax_dev_fault,
.pmd_fault = dax_dev_pmd_fault,
.open = dax_dev_vm_open,
.close = dax_dev_vm_close,
};
static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma)
static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct dax_dev *dax_dev = filp->private_data;
int rc;
dev_dbg(dax_dev->dev, "%s\n", __func__);
dev_dbg(&dax_dev->dev, "%s\n", __func__);
rc = check_vma(dax_dev, vma, __func__);
if (rc)
return rc;
kref_get(&dax_dev->kref);
vma->vm_ops = &dax_dev_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
return 0;
}
/* return an unmapped area aligned to the dax region specified alignment */
static unsigned long dax_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
unsigned long off, off_end, off_align, len_align, addr_align, align;
struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
struct dax_region *dax_region;
if (!dax_dev || addr)
goto out;
dax_region = dax_dev->region;
align = dax_region->align;
off = pgoff << PAGE_SHIFT;
off_end = off + len;
off_align = round_up(off, align);
if ((off_end <= off_align) || ((off_end - off_align) < align))
goto out;
len_align = len + align;
if ((off + len_align) < off)
goto out;
addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
pgoff, flags);
if (!IS_ERR_VALUE(addr_align)) {
addr_align += (off - addr_align) & (align - 1);
return addr_align;
}
out:
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
static int dax_open(struct inode *inode, struct file *filp)
{
struct dax_dev *dax_dev;
dax_dev = container_of(inode->i_cdev, struct dax_dev, cdev);
dev_dbg(&dax_dev->dev, "%s\n", __func__);
inode->i_mapping = dax_dev->inode->i_mapping;
inode->i_mapping->host = dax_dev->inode;
filp->f_mapping = inode->i_mapping;
filp->private_data = dax_dev;
inode->i_flags = S_DAX;
return 0;
}
static int dax_release(struct inode *inode, struct file *filp)
{
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(&dax_dev->dev, "%s\n", __func__);
return 0;
}
static const struct file_operations dax_fops = {
.llseek = noop_llseek,
.owner = THIS_MODULE,
.open = dax_dev_open,
.release = dax_dev_release,
.get_unmapped_area = dax_dev_get_unmapped_area,
.mmap = dax_dev_mmap,
.open = dax_open,
.release = dax_release,
.get_unmapped_area = dax_get_unmapped_area,
.mmap = dax_mmap,
};
static void dax_dev_release(struct device *dev)
{
struct dax_dev *dax_dev = to_dax_dev(dev);
struct dax_region *dax_region = dax_dev->region;
ida_simple_remove(&dax_region->ida, dax_dev->id);
ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
dax_region_put(dax_region);
iput(dax_dev->inode);
kfree(dax_dev);
}
static void unregister_dax_dev(void *dev)
{
struct dax_dev *dax_dev = to_dax_dev(dev);
struct cdev *cdev = &dax_dev->cdev;
dev_dbg(dev, "%s\n", __func__);
/*
* Note, rcu is not protecting the liveness of dax_dev, rcu is
* ensuring that any fault handlers that might have seen
* dax_dev->alive == true, have completed. Any fault handlers
* that start after synchronize_rcu() has started will abort
* upon seeing dax_dev->alive == false.
*/
dax_dev->alive = false;
synchronize_rcu();
unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
cdev_del(cdev);
device_unregister(dev);
}
struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
struct resource *res, int count)
{
struct device *parent = dax_region->dev;
struct dax_dev *dax_dev;
int rc = 0, minor, i;
struct device *dev;
struct cdev *cdev;
dev_t dev_t;
dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
if (!dax_dev)
return ERR_PTR(-ENOMEM);
for (i = 0; i < count; i++) {
if (!IS_ALIGNED(res[i].start, dax_region->align)
|| !IS_ALIGNED(resource_size(&res[i]),
dax_region->align)) {
rc = -EINVAL;
break;
}
dax_dev->res[i].start = res[i].start;
dax_dev->res[i].end = res[i].end;
}
if (i < count)
goto err_id;
dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
if (dax_dev->id < 0) {
rc = dax_dev->id;
goto err_id;
}
minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
if (minor < 0) {
rc = minor;
goto err_minor;
}
dev_t = MKDEV(MAJOR(dax_devt), minor);
dev = &dax_dev->dev;
dax_dev->inode = dax_inode_get(&dax_dev->cdev, dev_t);
if (!dax_dev->inode) {
rc = -ENOMEM;
goto err_inode;
}
/* device_initialize() so cdev can reference kobj parent */
device_initialize(dev);
cdev = &dax_dev->cdev;
cdev_init(cdev, &dax_fops);
cdev->owner = parent->driver->owner;
cdev->kobj.parent = &dev->kobj;
rc = cdev_add(&dax_dev->cdev, dev_t, 1);
if (rc)
goto err_cdev;
/* from here on we're committed to teardown via dax_dev_release() */
dax_dev->num_resources = count;
dax_dev->alive = true;
dax_dev->region = dax_region;
kref_get(&dax_region->kref);
dev->devt = dev_t;
dev->class = dax_class;
dev->parent = parent;
dev->groups = dax_attribute_groups;
dev->release = dax_dev_release;
dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id);
rc = device_add(dev);
if (rc) {
put_device(dev);
return ERR_PTR(rc);
}
rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
if (rc)
return ERR_PTR(rc);
return dax_dev;
err_cdev:
iput(dax_dev->inode);
err_inode:
ida_simple_remove(&dax_minor_ida, minor);
err_minor:
ida_simple_remove(&dax_region->ida, dax_dev->id);
err_id:
kfree(dax_dev);
return ERR_PTR(rc);
}
EXPORT_SYMBOL_GPL(devm_create_dax_dev);
static int __init dax_init(void)
{
int rc;
rc = register_chrdev(0, "dax", &dax_fops);
if (rc < 0)
rc = dax_inode_init();
if (rc)
return rc;
dax_major = rc;
nr_dax = max(nr_dax, 256);
rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
if (rc)
goto err_chrdev;
dax_class = class_create(THIS_MODULE, "dax");
if (IS_ERR(dax_class)) {
unregister_chrdev(dax_major, "dax");
return PTR_ERR(dax_class);
rc = PTR_ERR(dax_class);
goto err_class;
}
return 0;
err_class:
unregister_chrdev_region(dax_devt, nr_dax);
err_chrdev:
dax_inode_exit();
return rc;
}
static void __exit dax_exit(void)
{
class_destroy(dax_class);
unregister_chrdev(dax_major, "dax");
unregister_chrdev_region(dax_devt, nr_dax);
ida_destroy(&dax_minor_ida);
dax_inode_exit();
}
MODULE_AUTHOR("Intel Corporation");
......
......@@ -13,12 +13,13 @@
#ifndef __DAX_H__
#define __DAX_H__
struct device;
struct dax_dev;
struct resource;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent,
int region_id, struct resource *res, unsigned int align,
void *addr, unsigned long flags);
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
int count);
struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
struct resource *res, int count);
#endif /* __DAX_H__ */
......@@ -24,7 +24,7 @@ struct dax_pmem {
struct completion cmp;
};
struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
{
return container_of(ref, struct dax_pmem, ref);
}
......@@ -61,6 +61,7 @@ static int dax_pmem_probe(struct device *dev)
int rc;
void *addr;
struct resource res;
struct dax_dev *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct dax_pmem *dax_pmem;
struct nd_region *nd_region;
......@@ -126,12 +127,12 @@ static int dax_pmem_probe(struct device *dev)
return -ENOMEM;
/* TODO: support for subdividing a dax region... */
rc = devm_create_dax_dev(dax_region, &res, 1);
dax_dev = devm_create_dax_dev(dax_region, &res, 1);
/* child dax_dev instances now own the lifetime of the dax_region */
dax_region_put(dax_region);
return rc;
return PTR_ERR_OR_ZERO(dax_dev);
}
static struct nd_device_driver dax_pmem_driver = {
......
......@@ -89,7 +89,7 @@ config NVDIMM_PFN
Select Y if unsure
config NVDIMM_DAX
bool "NVDIMM DAX: Raw access to persistent memory"
tristate "NVDIMM DAX: Raw access to persistent memory"
default LIBNVDIMM
depends on NVDIMM_PFN
help
......
......@@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
return rc;
if (cmd_rc < 0)
return cmd_rc;
nvdimm_clear_from_poison_list(nvdimm_bus, phys, len);
return clear_err.cleared;
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
......
......@@ -547,11 +547,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
gfp_t flags)
{
struct nd_poison *pl;
pl = kzalloc(sizeof(*pl), GFP_KERNEL);
pl = kzalloc(sizeof(*pl), flags);
if (!pl)
return -ENOMEM;
......@@ -567,7 +568,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
struct nd_poison *pl;
if (list_empty(&nvdimm_bus->poison_list))
return add_poison(nvdimm_bus, addr, length);
return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
/*
* There is a chance this is a duplicate, check for those first.
......@@ -587,7 +588,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
return add_poison(nvdimm_bus, addr, length);
return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
......@@ -602,6 +603,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len)
{
struct list_head *poison_list = &nvdimm_bus->poison_list;
u64 clr_end = start + len - 1;
struct nd_poison *pl, *next;
nvdimm_bus_lock(&nvdimm_bus->dev);
WARN_ON_ONCE(list_empty(poison_list));
/*
* [start, clr_end] is the poison interval being cleared.
* [pl->start, pl_end] is the poison_list entry we're comparing
* the above interval against. The poison list entry may need
* to be modified (update either start or length), deleted, or
* split into two based on the overlap characteristics
*/
list_for_each_entry_safe(pl, next, poison_list, list) {
u64 pl_end = pl->start + pl->length - 1;
/* Skip intervals with no intersection */
if (pl_end < start)
continue;
if (pl->start > clr_end)
continue;
/* Delete completely overlapped poison entries */
if ((pl->start >= start) && (pl_end <= clr_end)) {
list_del(&pl->list);
kfree(pl);
continue;
}
/* Adjust start point of partially cleared entries */
if ((start <= pl->start) && (clr_end > pl->start)) {
pl->length -= clr_end - pl->start + 1;
pl->start = clr_end + 1;
continue;
}
/* Adjust pl->length for partial clearing at the tail end */
if ((pl->start < start) && (pl_end <= clr_end)) {
/* pl->start remains the same */
pl->length = start - pl->start;
continue;
}
/*
* If clearing in the middle of an entry, we split it into
* two by modifying the current entry to represent one half of
* the split, and adding a new entry for the second half.
*/
if ((pl->start < start) && (pl_end > clr_end)) {
u64 new_start = clr_end + 1;
u64 new_len = pl_end - new_start + 1;
/* Add new entry covering the right half */
add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
/* Adjust this entry to cover the left half */
pl->length = start - pl->start;
continue;
}
}
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list);
#ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
{
......
......@@ -26,6 +26,14 @@ static int nvdimm_probe(struct device *dev)
struct nvdimm_drvdata *ndd;
int rc;
rc = nvdimm_check_config_data(dev);
if (rc) {
/* not required for non-aliased nvdimm, ex. NVDIMM-N */
if (rc == -ENOTTY)
rc = 0;
return rc;
}
ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
if (!ndd)
return -ENOMEM;
......@@ -72,6 +80,9 @@ static int nvdimm_remove(struct device *dev)
{
struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
if (!ndd)
return 0;
nvdimm_bus_lock(dev);
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
......
......@@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida);
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
*/
static int __validate_dimm(struct nvdimm_drvdata *ndd)
int nvdimm_check_config_data(struct device *dev)
{
struct nvdimm *nvdimm;
if (!ndd)
return -EINVAL;
nvdimm = to_nvdimm(ndd->dev);
struct nvdimm *nvdimm = to_nvdimm(dev);
if (!nvdimm->cmd_mask)
return -ENXIO;
if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask))
if (!nvdimm->cmd_mask ||
!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
if (nvdimm->flags & NDD_ALIASING)
return -ENXIO;
else
return -ENOTTY;
}
return 0;
}
static int validate_dimm(struct nvdimm_drvdata *ndd)
{
int rc = __validate_dimm(ndd);
int rc;
if (!ndd)
return -EINVAL;
if (rc && ndd)
rc = nvdimm_check_config_data(ndd->dev);
if (rc)
dev_dbg(ndd->dev, "%pf: %s error: %d\n",
__builtin_return_address(0), __func__, rc);
return rc;
......@@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm)
}
EXPORT_SYMBOL_GPL(nvdimm_name);
struct kobject *nvdimm_kobj(struct nvdimm *nvdimm)
{
return &nvdimm->dev.kobj;
}
EXPORT_SYMBOL_GPL(nvdimm_kobj);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
{
return nvdimm->cmd_mask;
......@@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
}
EXPORT_SYMBOL_GPL(nvdimm_create);
int alias_dpa_busy(struct device *dev, void *data)
{
resource_size_t map_end, blk_start, new, busy;
struct blk_alloc_info *info = data;
struct nd_mapping *nd_mapping;
struct nd_region *nd_region;
struct nvdimm_drvdata *ndd;
struct resource *res;
int i;
if (!is_nd_pmem(dev))
return 0;
nd_region = to_nd_region(dev);
for (i = 0; i < nd_region->ndr_mappings; i++) {
nd_mapping = &nd_region->mapping[i];
if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
break;
}
if (i >= nd_region->ndr_mappings)
return 0;
ndd = to_ndd(nd_mapping);
map_end = nd_mapping->start + nd_mapping->size - 1;
blk_start = nd_mapping->start;
/*
* In the allocation case ->res is set to free space that we are
* looking to validate against PMEM aliasing collision rules
* (i.e. BLK is allocated after all aliased PMEM).
*/
if (info->res) {
if (info->res->start >= nd_mapping->start
&& info->res->start < map_end)
/* pass */;
else
return 0;
}
retry:
/*
* Find the free dpa from the end of the last pmem allocation to
* the end of the interleave-set mapping that is not already
* covered by a blk allocation.
*/
busy = 0;
for_each_dpa_resource(ndd, res) {
if ((res->start >= blk_start && res->start < map_end)
|| (res->end >= blk_start
&& res->end <= map_end)) {
if (strncmp(res->name, "pmem", 4) == 0) {
new = max(blk_start, min(map_end + 1,
res->end + 1));
if (new != blk_start) {
blk_start = new;
goto retry;
}
} else
busy += min(map_end, res->end)
- max(nd_mapping->start, res->start) + 1;
} else if (nd_mapping->start > res->start
&& map_end < res->end) {
/* total eclipse of the PMEM region mapping */
busy += nd_mapping->size;
break;
}
}
/* update the free space range with the probed blk_start */
if (info->res && blk_start > info->res->start) {
info->res->start = max(info->res->start, blk_start);
if (info->res->start > info->res->end)
info->res->end = info->res->start - 1;
return 1;
}
info->available -= blk_start - nd_mapping->start + busy;
return 0;
}
static int blk_dpa_busy(struct device *dev, void *data)
{
struct blk_alloc_info *info = data;
struct nd_mapping *nd_mapping;
struct nd_region *nd_region;
resource_size_t map_end;
int i;
if (!is_nd_pmem(dev))
return 0;
nd_region = to_nd_region(dev);
for (i = 0; i < nd_region->ndr_mappings; i++) {
nd_mapping = &nd_region->mapping[i];
if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
break;
}
if (i >= nd_region->ndr_mappings)
return 0;
map_end = nd_mapping->start + nd_mapping->size - 1;
if (info->res->start >= nd_mapping->start
&& info->res->start < map_end) {
if (info->res->end <= map_end) {
info->busy = 0;
return 1;
} else {
info->busy -= info->res->end - map_end;
return 0;
}
} else if (info->res->end >= nd_mapping->start
&& info->res->end <= map_end) {
info->busy -= nd_mapping->start - info->res->start;
return 0;
} else {
info->busy -= nd_mapping->size;
return 0;
}
}
/**
* nd_blk_available_dpa - account the unused dpa of BLK region
* @nd_mapping: container of dpa-resource-root + labels
*
* Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
* Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
* we arrange for them to never start at an lower dpa than the last
* PMEM allocation in an aliased region.
*/
resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
resource_size_t map_end, busy = 0, available;
struct blk_alloc_info info = {
.nd_mapping = nd_mapping,
.available = nd_mapping->size,
.res = NULL,
};
struct resource *res;
if (!ndd)
return 0;
map_end = nd_mapping->start + nd_mapping->size - 1;
for_each_dpa_resource(ndd, res)
if (res->start >= nd_mapping->start && res->start < map_end) {
resource_size_t end = min(map_end, res->end);
device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
busy += end - res->start + 1;
} else if (res->end >= nd_mapping->start
&& res->end <= map_end) {
busy += res->end - nd_mapping->start;
} else if (nd_mapping->start > res->start
&& nd_mapping->start < res->end) {
/* total eclipse of the BLK region mapping */
busy += nd_mapping->size;
/* now account for busy blk allocations in unaliased dpa */
for_each_dpa_resource(ndd, res) {
if (strncmp(res->name, "blk", 3) != 0)
continue;
info.res = res;
info.busy = resource_size(res);
device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
info.available -= info.busy;
}
available = map_end - nd_mapping->start + 1;
if (busy < available)
return available - busy;
return 0;
return info.available;
}
/**
......@@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
blk_start = max(map_start, map_end + 1 - *overlap);
for_each_dpa_resource(ndd, res)
for_each_dpa_resource(ndd, res) {
if (res->start >= map_start && res->start < map_end) {
if (strncmp(res->name, "blk", 3) == 0)
blk_start = min(blk_start, res->start);
else if (res->start != map_start) {
blk_start = min(blk_start,
max(map_start, res->start));
else if (res->end > map_end) {
reason = "misaligned to iset";
goto err;
} else {
if (busy) {
reason = "duplicate overlapping PMEM reservations?";
goto err;
}
} else
busy += resource_size(res);
continue;
}
} else if (res->end >= map_start && res->end <= map_end) {
if (strncmp(res->name, "blk", 3) == 0) {
/*
......@@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
* be used for BLK.
*/
blk_start = map_start;
} else {
reason = "misaligned to iset";
goto err;
}
} else
busy += resource_size(res);
} else if (map_start > res->start && map_start < res->end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
blk_start = map_start;
}
}
*overlap = map_end + 1 - blk_start;
available = blk_start - map_start;
......@@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
return 0;
err:
/*
* Something is wrong, PMEM must align with the start of the
* interleave set, and there can only be one allocation per set.
*/
nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
return 0;
}
......
......@@ -494,11 +494,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos)
{
u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
u64 cookie = nd_region_interleave_set_cookie(nd_region);
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *victim_label;
struct nd_label_ent *label_ent, *victim = NULL;
struct nd_namespace_label *nd_label;
struct nd_namespace_index *nsindex;
struct nd_label_id label_id;
struct resource *res;
unsigned long *free;
u32 nslot, slot;
size_t offset;
......@@ -507,6 +509,16 @@ static int __pmem_label_update(struct nd_region *nd_region,
if (!preamble_next(ndd, &nsindex, &free, &nslot))
return -ENXIO;
nd_label_gen_id(&label_id, nspm->uuid, 0);
for_each_dpa_resource(ndd, res)
if (strcmp(res->name, label_id.id) == 0)
break;
if (!res) {
WARN_ON_ONCE(1);
return -ENXIO;
}
/* allocate and write the label to the staging (next) index */
slot = nd_label_alloc_slot(ndd);
if (slot == UINT_MAX)
......@@ -522,11 +534,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie);
rawsize = div_u64(resource_size(&nspm->nsio.res),
nd_region->ndr_mappings);
nd_label->rawsize = __cpu_to_le64(rawsize);
nd_label->dpa = __cpu_to_le64(nd_mapping->start);
nd_label->rawsize = __cpu_to_le64(resource_size(res));
nd_label->dpa = __cpu_to_le64(res->start);
nd_label->slot = __cpu_to_le32(slot);
nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
/* update label */
offset = nd_label_offset(ndd, nd_label);
......@@ -536,38 +547,43 @@ static int __pmem_label_update(struct nd_region *nd_region,
return rc;
/* Garbage collect the previous label */
victim_label = nd_mapping->labels[0];
if (victim_label) {
slot = to_slot(ndd, victim_label);
nd_label_free_slot(ndd, slot);
mutex_lock(&nd_mapping->lock);
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
if (!label_ent->label)
continue;
if (memcmp(nspm->uuid, label_ent->label->uuid,
NSLABEL_UUID_LEN) != 0)
continue;
victim = label_ent;
list_move_tail(&victim->list, &nd_mapping->labels);
break;
}
if (victim) {
dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
slot = to_slot(ndd, victim->label);
nd_label_free_slot(ndd, slot);
victim->label = NULL;
}
/* update index */
rc = nd_label_write_index(ndd, ndd->ns_next,
nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
if (rc < 0)
return rc;
nd_mapping->labels[0] = nd_label;
return 0;
}
static void del_label(struct nd_mapping *nd_mapping, int l)
{
struct nd_namespace_label *next_label, *nd_label;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
unsigned int slot;
int j;
nd_label = nd_mapping->labels[l];
slot = to_slot(ndd, nd_label);
dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
if (rc == 0) {
list_for_each_entry(label_ent, &nd_mapping->labels, list)
if (!label_ent->label) {
label_ent->label = nd_label;
nd_label = NULL;
break;
}
dev_WARN_ONCE(&nspm->nsio.common.dev, nd_label,
"failed to track label: %d\n",
to_slot(ndd, nd_label));
if (nd_label)
rc = -ENXIO;
}
mutex_unlock(&nd_mapping->lock);
for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
nd_mapping->labels[j] = next_label;
nd_mapping->labels[j] = NULL;
return rc;
}
static bool is_old_resource(struct resource *res, struct resource **list, int n)
......@@ -607,14 +623,16 @@ static int __blk_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
int num_labels)
{
int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label;
struct nd_label_ent *label_ent, *e;
struct nd_namespace_index *nsindex;
unsigned long *free, *victim_map = NULL;
struct resource *res, **old_res_list;
struct nd_label_id label_id;
u8 uuid[NSLABEL_UUID_LEN];
LIST_HEAD(list);
u32 nslot, slot;
if (!preamble_next(ndd, &nsindex, &free, &nslot))
......@@ -736,15 +754,22 @@ static int __blk_label_update(struct nd_region *nd_region,
* entries in nd_mapping->labels
*/
nlabel = 0;
for_each_label(l, nd_label, nd_mapping->labels) {
mutex_lock(&nd_mapping->lock);
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
nd_label = label_ent->label;
if (!nd_label)
continue;
nlabel++;
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue;
nlabel--;
del_label(nd_mapping, l);
l--; /* retry with the new label at this index */
list_move(&label_ent->list, &list);
label_ent->label = NULL;
}
list_splice_tail_init(&list, &nd_mapping->labels);
mutex_unlock(&nd_mapping->lock);
if (nlabel + nsblk->num_resources > num_labels) {
/*
* Bug, we can't end up with more resources than
......@@ -755,6 +780,15 @@ static int __blk_label_update(struct nd_region *nd_region,
goto out;
}
mutex_lock(&nd_mapping->lock);
label_ent = list_first_entry_or_null(&nd_mapping->labels,
typeof(*label_ent), list);
if (!label_ent) {
WARN_ON(1);
mutex_unlock(&nd_mapping->lock);
rc = -ENXIO;
goto out;
}
for_each_clear_bit_le(slot, free, nslot) {
nd_label = nd_label_base(ndd) + slot;
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
......@@ -762,11 +796,19 @@ static int __blk_label_update(struct nd_region *nd_region,
continue;
res = to_resource(ndd, nd_label);
res->flags &= ~DPA_RESOURCE_ADJUSTED;
dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
l, slot);
nd_mapping->labels[l++] = nd_label;
dev_vdbg(&nsblk->common.dev, "assign label slot: %d\n", slot);
list_for_each_entry_from(label_ent, &nd_mapping->labels, list) {
if (label_ent->label)
continue;
label_ent->label = nd_label;
nd_label = NULL;
break;
}
if (nd_label)
dev_WARN(&nsblk->common.dev,
"failed to track label slot%d\n", slot);
}
nd_mapping->labels[l] = NULL;
mutex_unlock(&nd_mapping->lock);
out:
kfree(old_res_list);
......@@ -788,32 +830,28 @@ static int __blk_label_update(struct nd_region *nd_region,
static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
{
int i, l, old_num_labels = 0;
int i, old_num_labels = 0;
struct nd_label_ent *label_ent;
struct nd_namespace_index *nsindex;
struct nd_namespace_label *nd_label;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
for_each_label(l, nd_label, nd_mapping->labels)
mutex_lock(&nd_mapping->lock);
list_for_each_entry(label_ent, &nd_mapping->labels, list)
old_num_labels++;
mutex_unlock(&nd_mapping->lock);
/*
* We need to preserve all the old labels for the mapping so
* they can be garbage collected after writing the new labels.
*/
if (num_labels > old_num_labels) {
struct nd_namespace_label **labels;
labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
if (!labels)
for (i = old_num_labels; i < num_labels; i++) {
label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
if (!label_ent)
return -ENOMEM;
nd_mapping->labels = labels;
mutex_lock(&nd_mapping->lock);
list_add_tail(&label_ent->list, &nd_mapping->labels);
mutex_unlock(&nd_mapping->lock);
}
if (!nd_mapping->labels)
return -ENOMEM;
for (i = old_num_labels; i <= num_labels; i++)
nd_mapping->labels[i] = NULL;
if (ndd->ns_current == -1 || ndd->ns_next == -1)
/* pass */;
......@@ -837,42 +875,45 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
{
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label;
struct nd_label_ent *label_ent, *e;
struct nd_namespace_index *nsindex;
u8 label_uuid[NSLABEL_UUID_LEN];
int l, num_freed = 0;
unsigned long *free;
LIST_HEAD(list);
u32 nslot, slot;
int active = 0;
if (!uuid)
return 0;
/* no index || no labels == nothing to delete */
if (!preamble_next(ndd, &nsindex, &free, &nslot)
|| !nd_mapping->labels)
if (!preamble_next(ndd, &nsindex, &free, &nslot))
return 0;
for_each_label(l, nd_label, nd_mapping->labels) {
mutex_lock(&nd_mapping->lock);
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
if (!nd_label)
continue;
active++;
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
continue;
active--;
slot = to_slot(ndd, nd_label);
nd_label_free_slot(ndd, slot);
dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
del_label(nd_mapping, l);
num_freed++;
l--; /* retry with new label at this index */
list_move_tail(&label_ent->list, &list);
label_ent->label = NULL;
}
list_splice_tail_init(&list, &nd_mapping->labels);
if (num_freed > l) {
/*
* num_freed will only ever be > l when we delete the last
* label
*/
kfree(nd_mapping->labels);
nd_mapping->labels = NULL;
dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
if (active == 0) {
nd_mapping_free_labels(nd_mapping);
dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
}
mutex_unlock(&nd_mapping->lock);
return nd_label_write_index(ndd, ndd->ns_next,
nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
......@@ -885,7 +926,9 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
int rc;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res;
int rc, count = 0;
if (size == 0) {
rc = del_labels(nd_mapping, nspm->uuid);
......@@ -894,7 +937,12 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
continue;
}
rc = init_labels(nd_mapping, 1);
for_each_dpa_resource(ndd, res)
if (strncmp(res->name, "pmem", 3) == 0)
count++;
WARN_ON_ONCE(!count);
rc = init_labels(nd_mapping, count);
if (rc < 0)
return rc;
......
......@@ -12,8 +12,10 @@
*/
#include <linux/module.h>
#include <linux/device.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/list.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
......@@ -28,7 +30,10 @@ static void namespace_io_release(struct device *dev)
static void namespace_pmem_release(struct device *dev)
{
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
if (nspm->id >= 0)
ida_simple_remove(&nd_region->ns_ida, nspm->id);
kfree(nspm->alt_name);
kfree(nspm->uuid);
kfree(nspm);
......@@ -62,17 +67,17 @@ static struct device_type namespace_blk_device_type = {
.release = namespace_blk_release,
};
static bool is_namespace_pmem(struct device *dev)
static bool is_namespace_pmem(const struct device *dev)
{
return dev ? dev->type == &namespace_pmem_device_type : false;
}
static bool is_namespace_blk(struct device *dev)
static bool is_namespace_blk(const struct device *dev)
{
return dev ? dev->type == &namespace_blk_device_type : false;
}
static bool is_namespace_io(struct device *dev)
static bool is_namespace_io(const struct device *dev)
{
return dev ? dev->type == &namespace_io_device_type : false;
}
......@@ -168,7 +173,21 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
suffix = "s";
if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
int nsidx = 0;
if (is_namespace_pmem(&ndns->dev)) {
struct nd_namespace_pmem *nspm;
nspm = to_nd_namespace_pmem(&ndns->dev);
nsidx = nspm->id;
}
if (nsidx)
sprintf(name, "pmem%d.%d%s", nd_region->id, nsidx,
suffix ? suffix : "");
else
sprintf(name, "pmem%d%s", nd_region->id,
suffix ? suffix : "");
} else if (is_namespace_blk(&ndns->dev)) {
struct nd_namespace_blk *nsblk;
......@@ -294,7 +313,7 @@ static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
if (strcmp(res->name, label_id.id) != 0)
continue;
/*
* Resources with unacknoweldged adjustments indicate a
* Resources with unacknowledged adjustments indicate a
* failure to update labels
*/
if (res->flags & DPA_RESOURCE_ADJUSTED)
......@@ -510,19 +529,68 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
return rc ? n : 0;
}
static bool space_valid(bool is_pmem, bool is_reserve,
struct nd_label_id *label_id, struct resource *res)
{
/*
* For BLK-space any space is valid, for PMEM-space, it must be
* contiguous with an existing allocation unless we are
* reserving pmem.
/**
* space_valid() - validate free dpa space against constraints
* @nd_region: hosting region of the free space
* @ndd: dimm device data for debug
* @label_id: namespace id to allocate space
* @prev: potential allocation that precedes free space
* @next: allocation that follows the given free space range
* @exist: first allocation with same id in the mapping
* @n: range that must satisfied for pmem allocations
* @valid: free space range to validate
*
* BLK-space is valid as long as it does not precede a PMEM
* allocation in a given region. PMEM-space must be contiguous
* and adjacent to an existing existing allocation (if one
* exists). If reserving PMEM any space is valid.
*/
if (is_reserve || !is_pmem)
return true;
if (!res || strcmp(res->name, label_id->id) == 0)
return true;
return false;
static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id, struct resource *prev,
struct resource *next, struct resource *exist,
resource_size_t n, struct resource *valid)
{
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
if (valid->start >= valid->end)
goto invalid;
if (is_reserve)
return;
if (!is_pmem) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_bus *nvdimm_bus;
struct blk_alloc_info info = {
.nd_mapping = nd_mapping,
.available = nd_mapping->size,
.res = valid,
};
WARN_ON(!is_nd_blk(&nd_region->dev));
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
return;
}
/* allocation needs to be contiguous, so this is all or nothing */
if (resource_size(valid) < n)
goto invalid;
/* we've got all the space we need and no existing allocation */
if (!exist)
return;
/* allocation needs to be contiguous with the existing namespace */
if (valid->start == exist->end + 1
|| valid->end == exist->start - 1)
return;
invalid:
/* truncate @valid size to 0 */
valid->end = valid->start - 1;
}
enum alloc_loc {
......@@ -534,18 +602,24 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
resource_size_t n)
{
resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res, *exist = NULL, valid;
const resource_size_t to_allocate = n;
struct resource *res;
int first;
for_each_dpa_resource(ndd, res)
if (strcmp(label_id->id, res->name) == 0)
exist = res;
valid.start = nd_mapping->start;
valid.end = mapping_end;
valid.name = "free space";
retry:
first = 0;
for_each_dpa_resource(ndd, res) {
resource_size_t allocate, available = 0, free_start, free_end;
struct resource *next = res->sibling, *new_res = NULL;
resource_size_t allocate, available = 0;
enum alloc_loc loc = ALLOC_ERR;
const char *action;
int rc = 0;
......@@ -558,33 +632,36 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
/* space at the beginning of the mapping */
if (!first++ && res->start > nd_mapping->start) {
free_start = nd_mapping->start;
available = res->start - free_start;
if (space_valid(is_pmem, is_reserve, label_id, NULL))
valid.start = nd_mapping->start;
valid.end = res->start - 1;
space_valid(nd_region, ndd, label_id, NULL, next, exist,
to_allocate, &valid);
available = resource_size(&valid);
if (available)
loc = ALLOC_BEFORE;
}
/* space between allocations */
if (!loc && next) {
free_start = res->start + resource_size(res);
free_end = min(mapping_end, next->start - 1);
if (space_valid(is_pmem, is_reserve, label_id, res)
&& free_start < free_end) {
available = free_end + 1 - free_start;
valid.start = res->start + resource_size(res);
valid.end = min(mapping_end, next->start - 1);
space_valid(nd_region, ndd, label_id, res, next, exist,
to_allocate, &valid);
available = resource_size(&valid);
if (available)
loc = ALLOC_MID;
}
}
/* space at the end of the mapping */
if (!loc && !next) {
free_start = res->start + resource_size(res);
free_end = mapping_end;
if (space_valid(is_pmem, is_reserve, label_id, res)
&& free_start < free_end) {
available = free_end + 1 - free_start;
valid.start = res->start + resource_size(res);
valid.end = mapping_end;
space_valid(nd_region, ndd, label_id, res, next, exist,
to_allocate, &valid);
available = resource_size(&valid);
if (available)
loc = ALLOC_AFTER;
}
}
if (!loc || !available)
continue;
......@@ -593,8 +670,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_BEFORE:
if (strcmp(res->name, label_id->id) == 0) {
/* adjust current resource up */
if (is_pmem && !is_reserve)
return n;
rc = adjust_resource(res, res->start - allocate,
resource_size(res) + allocate);
action = "cur grow up";
......@@ -604,8 +679,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_MID:
if (strcmp(next->name, label_id->id) == 0) {
/* adjust next resource up */
if (is_pmem && !is_reserve)
return n;
rc = adjust_resource(next, next->start
- allocate, resource_size(next)
+ allocate);
......@@ -629,12 +702,10 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
if (strcmp(action, "allocate") == 0) {
/* BLK allocate bottom up */
if (!is_pmem)
free_start += available - allocate;
else if (!is_reserve && free_start != nd_mapping->start)
return n;
valid.start += available - allocate;
new_res = nvdimm_allocate_dpa(ndd, label_id,
free_start, allocate);
valid.start, allocate);
if (!new_res)
rc = -EBUSY;
} else if (strcmp(action, "grow down") == 0) {
......@@ -832,13 +903,45 @@ static int grow_dpa_allocation(struct nd_region *nd_region,
return 0;
}
static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm, resource_size_t size)
{
struct resource *res = &nspm->nsio.res;
resource_size_t offset = 0;
res->start = nd_region->ndr_start;
res->end = nd_region->ndr_start + size - 1;
if (size && !nspm->uuid) {
WARN_ON_ONCE(1);
size = 0;
}
if (size && nspm->uuid) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_id label_id;
struct resource *res;
if (!ndd) {
size = 0;
goto out;
}
nd_label_gen_id(&label_id, nspm->uuid, 0);
/* calculate a spa offset from the dpa allocation offset */
for_each_dpa_resource(ndd, res)
if (strcmp(res->name, label_id.id) == 0) {
offset = (res->start - nd_mapping->start)
* nd_region->ndr_mappings;
goto out;
}
WARN_ON_ONCE(1);
size = 0;
}
out:
res->start = nd_region->ndr_start + offset;
res->end = res->start + size - 1;
}
static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
......@@ -929,7 +1032,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
nd_namespace_pmem_set_size(nd_region, nspm,
nd_namespace_pmem_set_resource(nd_region, nspm,
val * nd_region->ndr_mappings);
} else if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
......@@ -1031,22 +1134,27 @@ static ssize_t size_show(struct device *dev,
}
static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
static ssize_t uuid_show(struct device *dev,
struct device_attribute *attr, char *buf)
static u8 *namespace_to_uuid(struct device *dev)
{
u8 *uuid;
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
uuid = nspm->uuid;
return nspm->uuid;
} else if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
uuid = nsblk->uuid;
return nsblk->uuid;
} else
return -ENXIO;
return ERR_PTR(-ENXIO);
}
static ssize_t uuid_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
u8 *uuid = namespace_to_uuid(dev);
if (IS_ERR(uuid))
return PTR_ERR(uuid);
if (uuid)
return sprintf(buf, "%pUb\n", uuid);
return sprintf(buf, "\n");
......@@ -1089,7 +1197,7 @@ static int namespace_update_uuid(struct nd_region *nd_region,
*
* FIXME: can we delete uuid with zero dpa allocated?
*/
if (nd_mapping->labels)
if (list_empty(&nd_mapping->labels))
return -EBUSY;
}
......@@ -1491,14 +1599,19 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nd_namespace_label *nd_label;
struct nd_label_ent *label_ent;
bool found_uuid = false;
int l;
for_each_label(l, nd_label, nd_mapping->labels) {
u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
u16 position = __le16_to_cpu(nd_label->position);
u16 nlabel = __le16_to_cpu(nd_label->nlabel);
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
u16 position, nlabel;
u64 isetcookie;
if (!nd_label)
continue;
isetcookie = __le64_to_cpu(nd_label->isetcookie);
position = __le16_to_cpu(nd_label->position);
nlabel = __le16_to_cpu(nd_label->nlabel);
if (isetcookie != cookie)
continue;
......@@ -1528,7 +1641,6 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
{
struct nd_namespace_label *select = NULL;
int i;
if (!pmem_id)
......@@ -1536,90 +1648,106 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nd_namespace_label *nd_label;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label = NULL;
u64 hw_start, hw_end, pmem_start, pmem_end;
int l;
struct nd_label_ent *label_ent;
for_each_label(l, nd_label, nd_mapping->labels)
WARN_ON(!mutex_is_locked(&nd_mapping->lock));
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
nd_label = label_ent->label;
if (!nd_label)
continue;
if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
break;
nd_label = NULL;
}
if (!nd_label) {
WARN_ON(1);
return -EINVAL;
}
select = nd_label;
/*
* Check that this label is compliant with the dpa
* range published in NFIT
*/
hw_start = nd_mapping->start;
hw_end = hw_start + nd_mapping->size;
pmem_start = __le64_to_cpu(select->dpa);
pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
if (pmem_start == hw_start && pmem_end <= hw_end)
pmem_start = __le64_to_cpu(nd_label->dpa);
pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
if (pmem_start >= hw_start && pmem_start < hw_end
&& pmem_end <= hw_end && pmem_end > hw_start)
/* pass */;
else
else {
dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
dev_name(ndd->dev), nd_label->uuid);
return -EINVAL;
}
nd_mapping->labels[0] = select;
nd_mapping->labels[1] = NULL;
/* move recently validated label to the front of the list */
list_move(&label_ent->list, &nd_mapping->labels);
}
return 0;
}
/**
* find_pmem_label_set - validate interleave set labelling, retrieve label0
* create_namespace_pmem - validate interleave set labelling, retrieve label0
* @nd_region: region with mappings to validate
* @nspm: target namespace to create
* @nd_label: target pmem namespace label to evaluate
*/
static int find_pmem_label_set(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm)
struct device *create_namespace_pmem(struct nd_region *nd_region,
struct nd_namespace_label *nd_label)
{
u64 cookie = nd_region_interleave_set_cookie(nd_region);
struct nd_namespace_label *nd_label;
u8 select_id[NSLABEL_UUID_LEN];
struct nd_label_ent *label_ent;
struct nd_namespace_pmem *nspm;
struct nd_mapping *nd_mapping;
resource_size_t size = 0;
u8 *pmem_id = NULL;
int rc = -ENODEV, l;
struct resource *res;
struct device *dev;
int rc = 0;
u16 i;
if (cookie == 0)
return -ENXIO;
if (cookie == 0) {
dev_dbg(&nd_region->dev, "invalid interleave-set-cookie\n");
return ERR_PTR(-ENXIO);
}
/*
* Find a complete set of labels by uuid. By definition we can start
* with any mapping as the reference label
*/
for_each_label(l, nd_label, nd_region->mapping[0].labels) {
u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
nd_label->uuid);
return ERR_PTR(-EAGAIN);
}
if (isetcookie != cookie)
continue;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
if (!nspm)
return ERR_PTR(-ENOMEM);
nspm->id = -1;
dev = &nspm->nsio.common.dev;
dev->type = &namespace_pmem_device_type;
dev->parent = &nd_region->dev;
res = &nspm->nsio.res;
res->name = dev_name(&nd_region->dev);
res->flags = IORESOURCE_MEM;
for (i = 0; nd_region->ndr_mappings; i++)
if (!has_uuid_at_pos(nd_region, nd_label->uuid,
cookie, i))
for (i = 0; i < nd_region->ndr_mappings; i++)
if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
break;
if (i < nd_region->ndr_mappings) {
struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
/*
* Give up if we don't find an instance of a
* uuid at each position (from 0 to
* nd_region->ndr_mappings - 1), or if we find a
* dimm with two instances of the same uuid.
* Give up if we don't find an instance of a uuid at each
* position (from 0 to nd_region->ndr_mappings - 1), or if we
* find a dimm with two instances of the same uuid.
*/
dev_err(&nd_region->dev, "%s missing label for %pUb\n",
dev_name(ndd->dev), nd_label->uuid);
rc = -EINVAL;
goto err;
} else if (pmem_id) {
/*
* If there is more than one valid uuid set, we
* need userspace to clean this up.
*/
rc = -EBUSY;
goto err;
}
memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
pmem_id = select_id;
}
/*
......@@ -1630,14 +1758,23 @@ static int find_pmem_label_set(struct nd_region *nd_region,
* the dimm being enabled (i.e. nd_label_reserve_dpa()
* succeeded).
*/
rc = select_pmem_id(nd_region, pmem_id);
rc = select_pmem_id(nd_region, nd_label->uuid);
if (rc)
goto err;
/* Calculate total size and populate namespace properties from label0 */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nd_namespace_label *label0 = nd_mapping->labels[0];
struct nd_namespace_label *label0;
nd_mapping = &nd_region->mapping[i];
label_ent = list_first_entry_or_null(&nd_mapping->labels,
typeof(*label_ent), list);
label0 = label_ent ? label_ent->label : 0;
if (!label0) {
WARN_ON(1);
continue;
}
size += __le64_to_cpu(label0->rawsize);
if (__le16_to_cpu(label0->position) != 0)
......@@ -1654,10 +1791,11 @@ static int find_pmem_label_set(struct nd_region *nd_region,
goto err;
}
nd_namespace_pmem_set_size(nd_region, nspm, size);
nd_namespace_pmem_set_resource(nd_region, nspm, size);
return 0;
return dev;
err:
namespace_pmem_release(dev);
switch (rc) {
case -EINVAL:
dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
......@@ -1670,55 +1808,7 @@ static int find_pmem_label_set(struct nd_region *nd_region,
__func__, rc);
break;
}
return rc;
}
static struct device **create_namespace_pmem(struct nd_region *nd_region)
{
struct nd_namespace_pmem *nspm;
struct device *dev, **devs;
struct resource *res;
int rc;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
if (!nspm)
return NULL;
dev = &nspm->nsio.common.dev;
dev->type = &namespace_pmem_device_type;
dev->parent = &nd_region->dev;
res = &nspm->nsio.res;
res->name = dev_name(&nd_region->dev);
res->flags = IORESOURCE_MEM;
rc = find_pmem_label_set(nd_region, nspm);
if (rc == -ENODEV) {
int i;
/* Pass, try to permit namespace creation... */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
kfree(nd_mapping->labels);
nd_mapping->labels = NULL;
}
/* Publish a zero-sized namespace for userspace to configure. */
nd_namespace_pmem_set_size(nd_region, nspm, 0);
rc = 0;
} else if (rc)
goto err;
devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
if (!devs)
goto err;
devs[0] = dev;
return devs;
err:
namespace_pmem_release(&nspm->nsio.common.dev);
return NULL;
return ERR_PTR(rc);
}
struct resource *nsblk_add_resource(struct nd_region *nd_region,
......@@ -1770,16 +1860,58 @@ static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
return &nsblk->common.dev;
}
void nd_region_create_blk_seed(struct nd_region *nd_region)
static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
{
struct nd_namespace_pmem *nspm;
struct resource *res;
struct device *dev;
if (!is_nd_pmem(&nd_region->dev))
return NULL;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
if (!nspm)
return NULL;
dev = &nspm->nsio.common.dev;
dev->type = &namespace_pmem_device_type;
dev->parent = &nd_region->dev;
res = &nspm->nsio.res;
res->name = dev_name(&nd_region->dev);
res->flags = IORESOURCE_MEM;
nspm->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
if (nspm->id < 0) {
kfree(nspm);
return NULL;
}
dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
dev->parent = &nd_region->dev;
dev->groups = nd_namespace_attribute_groups;
nd_namespace_pmem_set_resource(nd_region, nspm, 0);
return dev;
}
void nd_region_create_ns_seed(struct nd_region *nd_region)
{
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO)
return;
if (is_nd_blk(&nd_region->dev))
nd_region->ns_seed = nd_namespace_blk_create(nd_region);
else
nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
/*
* Seed creation failures are not fatal, provisioning is simply
* disabled until memory becomes available
*/
if (!nd_region->ns_seed)
dev_err(&nd_region->dev, "failed to create blk namespace\n");
dev_err(&nd_region->dev, "failed to create %s namespace\n",
is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
else
nd_device_register(nd_region->ns_seed);
}
......@@ -1820,66 +1952,67 @@ void nd_region_create_btt_seed(struct nd_region *nd_region)
dev_err(&nd_region->dev, "failed to create btt namespace\n");
}
static struct device **create_namespace_blk(struct nd_region *nd_region)
static int add_namespace_resource(struct nd_region *nd_region,
struct nd_namespace_label *nd_label, struct device **devs,
int count)
{
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nd_namespace_label *nd_label;
struct device *dev, **devs = NULL;
struct nd_namespace_blk *nsblk;
struct nvdimm_drvdata *ndd;
int i, l, count = 0;
struct resource *res;
if (nd_region->ndr_mappings == 0)
return NULL;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
int i;
ndd = to_ndd(nd_mapping);
for_each_label(l, nd_label, nd_mapping->labels) {
u32 flags = __le32_to_cpu(nd_label->flags);
char *name[NSLABEL_NAME_LEN];
struct device **__devs;
for (i = 0; i < count; i++) {
u8 *uuid = namespace_to_uuid(devs[i]);
struct resource *res;
if (flags & NSLABEL_FLAG_LOCAL)
/* pass */;
else
if (IS_ERR_OR_NULL(uuid)) {
WARN_ON(1);
continue;
}
for (i = 0; i < count; i++) {
nsblk = to_nd_namespace_blk(devs[i]);
if (memcmp(nsblk->uuid, nd_label->uuid,
NSLABEL_UUID_LEN) == 0) {
res = nsblk_add_resource(nd_region, ndd, nsblk,
if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0)
continue;
if (is_namespace_blk(devs[i])) {
res = nsblk_add_resource(nd_region, ndd,
to_nd_namespace_blk(devs[i]),
__le64_to_cpu(nd_label->dpa));
if (!res)
goto err;
nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
dev_name(&nsblk->common.dev));
break;
return -ENXIO;
nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
} else {
dev_err(&nd_region->dev,
"error: conflicting extents for uuid: %pUb\n",
nd_label->uuid);
return -ENXIO;
}
break;
}
if (i < count)
continue;
__devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
if (!__devs)
goto err;
memcpy(__devs, devs, sizeof(dev) * count);
kfree(devs);
devs = __devs;
return i;
}
struct device *create_namespace_blk(struct nd_region *nd_region,
struct nd_namespace_label *nd_label, int count)
{
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_blk *nsblk;
char *name[NSLABEL_NAME_LEN];
struct device *dev = NULL;
struct resource *res;
nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
if (!nsblk)
goto err;
return ERR_PTR(-ENOMEM);
dev = &nsblk->common.dev;
dev->type = &namespace_blk_device_type;
dev->parent = &nd_region->dev;
dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
devs[count++] = dev;
nsblk->id = -1;
nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
GFP_KERNEL);
if (!nsblk->uuid)
goto err;
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
if (name[0])
nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
......@@ -1887,46 +2020,198 @@ static struct device **create_namespace_blk(struct nd_region *nd_region)
res = nsblk_add_resource(nd_region, ndd, nsblk,
__le64_to_cpu(nd_label->dpa));
if (!res)
goto blk_err;
nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
return dev;
blk_err:
namespace_blk_release(dev);
return ERR_PTR(-ENXIO);
}
static int cmp_dpa(const void *a, const void *b)
{
const struct device *dev_a = *(const struct device **) a;
const struct device *dev_b = *(const struct device **) b;
struct nd_namespace_blk *nsblk_a, *nsblk_b;
struct nd_namespace_pmem *nspm_a, *nspm_b;
if (is_namespace_io(dev_a))
return 0;
if (is_namespace_blk(dev_a)) {
nsblk_a = to_nd_namespace_blk(dev_a);
nsblk_b = to_nd_namespace_blk(dev_b);
return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start,
sizeof(resource_size_t));
}
nspm_a = to_nd_namespace_pmem(dev_a);
nspm_b = to_nd_namespace_pmem(dev_b);
return memcmp(&nspm_a->nsio.res.start, &nspm_b->nsio.res.start,
sizeof(resource_size_t));
}
static struct device **scan_labels(struct nd_region *nd_region)
{
int i, count = 0;
struct device *dev, **devs = NULL;
struct nd_label_ent *label_ent, *e;
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
/* "safe" because create_namespace_pmem() might list_move() label_ent */
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
struct device **__devs;
u32 flags;
if (!nd_label)
continue;
flags = __le32_to_cpu(nd_label->flags);
if (is_nd_blk(&nd_region->dev)
== !!(flags & NSLABEL_FLAG_LOCAL))
/* pass, region matches label type */;
else
continue;
/* skip labels that describe extents outside of the region */
if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
continue;
i = add_namespace_resource(nd_region, nd_label, devs, count);
if (i < 0)
goto err;
if (i < count)
continue;
__devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
if (!__devs)
goto err;
memcpy(__devs, devs, sizeof(dev) * count);
kfree(devs);
devs = __devs;
if (is_nd_blk(&nd_region->dev)) {
dev = create_namespace_blk(nd_region, nd_label, count);
if (IS_ERR(dev))
goto err;
nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
dev_name(&nsblk->common.dev));
devs[count++] = dev;
} else {
dev = create_namespace_pmem(nd_region, nd_label);
if (IS_ERR(dev)) {
switch (PTR_ERR(dev)) {
case -EAGAIN:
/* skip invalid labels */
continue;
case -ENODEV:
/* fallthrough to seed creation */
break;
default:
goto err;
}
} else
devs[count++] = dev;
}
}
dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
__func__, count, count == 1 ? "" : "s");
dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
__func__, count, is_nd_blk(&nd_region->dev)
? "blk" : "pmem", count == 1 ? "" : "s");
if (count == 0) {
/* Publish a zero-sized namespace for userspace to configure. */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
kfree(nd_mapping->labels);
nd_mapping->labels = NULL;
}
nd_mapping_free_labels(nd_mapping);
devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
if (!devs)
goto err;
if (is_nd_blk(&nd_region->dev)) {
struct nd_namespace_blk *nsblk;
nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
if (!nsblk)
goto err;
dev = &nsblk->common.dev;
dev->type = &namespace_blk_device_type;
} else {
struct nd_namespace_pmem *nspm;
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
if (!nspm)
goto err;
dev = &nspm->nsio.common.dev;
dev->type = &namespace_pmem_device_type;
nd_namespace_pmem_set_resource(nd_region, nspm, 0);
}
dev->parent = &nd_region->dev;
devs[count++] = dev;
} else if (is_nd_pmem(&nd_region->dev)) {
/* clean unselected labels */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct list_head *l, *e;
LIST_HEAD(list);
int j;
nd_mapping = &nd_region->mapping[i];
if (list_empty(&nd_mapping->labels)) {
WARN_ON(1);
continue;
}
j = count;
list_for_each_safe(l, e, &nd_mapping->labels) {
if (!j--)
break;
list_move_tail(l, &list);
}
nd_mapping_free_labels(nd_mapping);
list_splice_init(&list, &nd_mapping->labels);
}
}
if (count > 1)
sort(devs, count, sizeof(struct device *), cmp_dpa, NULL);
return devs;
err:
for (i = 0; i < count; i++) {
nsblk = to_nd_namespace_blk(devs[i]);
namespace_blk_release(&nsblk->common.dev);
}
err:
for (i = 0; devs[i]; i++)
if (is_nd_blk(&nd_region->dev))
namespace_blk_release(devs[i]);
else
namespace_pmem_release(devs[i]);
kfree(devs);
return NULL;
}
static struct device **create_namespaces(struct nd_region *nd_region)
{
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct device **devs;
int i;
if (nd_region->ndr_mappings == 0)
return NULL;
/* lock down all mappings while we scan labels */
for (i = 0; i < nd_region->ndr_mappings; i++) {
nd_mapping = &nd_region->mapping[i];
mutex_lock_nested(&nd_mapping->lock, i);
}
devs = scan_labels(nd_region);
for (i = 0; i < nd_region->ndr_mappings; i++) {
int reverse = nd_region->ndr_mappings - 1 - i;
nd_mapping = &nd_region->mapping[reverse];
mutex_unlock(&nd_mapping->lock);
}
return devs;
}
static int init_active_labels(struct nd_region *nd_region)
{
int i;
......@@ -1935,6 +2220,7 @@ static int init_active_labels(struct nd_region *nd_region)
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nvdimm *nvdimm = nd_mapping->nvdimm;
struct nd_label_ent *label_ent;
int count, j;
/*
......@@ -1956,16 +2242,27 @@ static int init_active_labels(struct nd_region *nd_region)
dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
if (!count)
continue;
nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
GFP_KERNEL);
if (!nd_mapping->labels)
return -ENOMEM;
for (j = 0; j < count; j++) {
struct nd_namespace_label *label;
label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
if (!label_ent)
break;
label = nd_label_active(ndd, j);
nd_mapping->labels[j] = label;
label_ent->label = label;
mutex_lock(&nd_mapping->lock);
list_add_tail(&label_ent->list, &nd_mapping->labels);
mutex_unlock(&nd_mapping->lock);
}
if (j >= count)
continue;
mutex_lock(&nd_mapping->lock);
nd_mapping_free_labels(nd_mapping);
mutex_unlock(&nd_mapping->lock);
return -ENOMEM;
}
return 0;
......@@ -1990,10 +2287,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
devs = create_namespace_io(nd_region);
break;
case ND_DEVICE_NAMESPACE_PMEM:
devs = create_namespace_pmem(nd_region);
break;
case ND_DEVICE_NAMESPACE_BLK:
devs = create_namespace_blk(nd_region);
devs = create_namespaces(nd_region);
break;
default:
break;
......@@ -2014,6 +2309,13 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
id = ida_simple_get(&nd_region->ns_ida, 0, 0,
GFP_KERNEL);
nsblk->id = id;
} else if (type == ND_DEVICE_NAMESPACE_PMEM) {
struct nd_namespace_pmem *nspm;
nspm = to_nd_namespace_pmem(dev);
id = ida_simple_get(&nd_region->ns_ida, 0, 0,
GFP_KERNEL);
nspm->id = id;
} else
id = i;
......
......@@ -44,6 +44,23 @@ struct nvdimm {
struct resource *flush_wpq;
};
/**
* struct blk_alloc_info - tracking info for BLK dpa scanning
* @nd_mapping: blk region mapping boundaries
* @available: decremented in alias_dpa_busy as aliased PMEM is scanned
* @busy: decremented in blk_dpa_busy to account for ranges already
* handled by alias_dpa_busy
* @res: alias_dpa_busy interprets this a free space range that needs to
* be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
* treats it as a busy range that needs the aliased PMEM ranges
* truncated.
*/
struct blk_alloc_info {
struct nd_mapping *nd_mapping;
resource_size_t available, busy;
struct resource *res;
};
bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev);
bool is_nd_blk(struct device *dev);
......@@ -54,7 +71,7 @@ void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region;
void nd_region_create_blk_seed(struct nd_region *nd_region);
void nd_region_create_ns_seed(struct nd_region *nd_region);
void nd_region_create_btt_seed(struct nd_region *nd_region);
void nd_region_create_pfn_seed(struct nd_region *nd_region);
void nd_region_create_dax_seed(struct nd_region *nd_region);
......@@ -73,13 +90,14 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
struct nd_region;
struct nvdimm_drvdata;
struct nd_mapping;
void nd_mapping_free_labels(struct nd_mapping *nd_mapping);
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, resource_size_t *overlap);
resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id);
struct nd_mapping;
int alias_dpa_busy(struct device *dev, void *data);
struct resource *nsblk_add_resource(struct nd_region *nd_region,
struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
resource_size_t start);
......
......@@ -101,9 +101,6 @@ static inline struct nd_namespace_index *to_next_namespace_index(
(unsigned long long) (res ? resource_size(res) : 0), \
(unsigned long long) (res ? res->start : 0), ##arg)
#define for_each_label(l, label, labels) \
for (l = 0; (label = labels ? labels[l] : NULL); l++)
#define for_each_dpa_resource(ndd, res) \
for (res = (ndd)->dpa.child; res; res = res->sibling)
......@@ -116,6 +113,31 @@ struct nd_percpu_lane {
spinlock_t lock;
};
struct nd_label_ent {
struct list_head list;
struct nd_namespace_label *label;
};
enum nd_mapping_lock_class {
ND_MAPPING_CLASS0,
ND_MAPPING_UUID_SCAN,
};
struct nd_mapping {
struct nvdimm *nvdimm;
u64 start;
u64 size;
struct list_head labels;
struct mutex lock;
/*
* @ndd is for private use at region enable / disable time for
* get_ndd() + put_ndd(), all other nd_mapping to ndd
* conversions use to_ndd() which respects enabled state of the
* nvdimm.
*/
struct nvdimm_drvdata *ndd;
};
struct nd_region {
struct device dev;
struct ida ns_ida;
......@@ -209,6 +231,7 @@ void nvdimm_exit(void);
void nd_region_exit(void);
struct nvdimm;
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
int nvdimm_check_config_data(struct device *dev);
int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
......
......@@ -66,13 +66,32 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
invalidate_pmem(pmem->virt_addr + offset, len);
}
static void write_pmem(void *pmem_addr, struct page *page,
unsigned int off, unsigned int len)
{
void *mem = kmap_atomic(page);
memcpy_to_pmem(pmem_addr, mem + off, len);
kunmap_atomic(mem);
}
static int read_pmem(struct page *page, unsigned int off,
void *pmem_addr, unsigned int len)
{
int rc;
void *mem = kmap_atomic(page);
rc = memcpy_from_pmem(mem + off, pmem_addr, len);
kunmap_atomic(mem);
return rc;
}
static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, bool is_write,
sector_t sector)
{
int rc = 0;
bool bad_pmem = false;
void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
......@@ -83,7 +102,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (unlikely(bad_pmem))
rc = -EIO;
else {
rc = memcpy_from_pmem(mem + off, pmem_addr, len);
rc = read_pmem(page, off, pmem_addr, len);
flush_dcache_page(page);
}
} else {
......@@ -102,14 +121,13 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
* after clear poison.
*/
flush_dcache_page(page);
memcpy_to_pmem(pmem_addr, mem + off, len);
write_pmem(pmem_addr, page, off, len);
if (unlikely(bad_pmem)) {
pmem_clear_poison(pmem, pmem_off, len);
memcpy_to_pmem(pmem_addr, mem + off, len);
write_pmem(pmem_addr, page, off, len);
}
}
kunmap_atomic(mem);
return rc;
}
......
......@@ -70,7 +70,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
int nd_region_activate(struct nd_region *nd_region)
{
int i, num_flush = 0;
int i, j, num_flush = 0;
struct nd_region_data *ndrd;
struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
......@@ -107,6 +107,21 @@ int nd_region_activate(struct nd_region *nd_region)
return rc;
}
/*
* Clear out entries that are duplicates. This should prevent the
* extra flushings.
*/
for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
/* ignore if NULL already */
if (!ndrd_get_flush_wpq(ndrd, i, 0))
continue;
for (j = i + 1; j < nd_region->ndr_mappings; j++)
if (ndrd_get_flush_wpq(ndrd, i, 0) ==
ndrd_get_flush_wpq(ndrd, j, 0))
ndrd_set_flush_wpq(ndrd, j, 0, NULL);
}
return 0;
}
......@@ -298,9 +313,8 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
blk_max_overlap = overlap;
goto retry;
}
} else if (is_nd_blk(&nd_region->dev)) {
available += nd_blk_available_dpa(nd_mapping);
}
} else if (is_nd_blk(&nd_region->dev))
available += nd_blk_available_dpa(nd_region);
}
return available;
......@@ -491,6 +505,17 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
return 0;
}
void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
{
struct nd_label_ent *label_ent, *e;
WARN_ON(!mutex_is_locked(&nd_mapping->lock));
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
list_del(&label_ent->list);
kfree(label_ent);
}
}
/*
* Upon successful probe/remove, take/release a reference on the
* associated interleave set (if present), and plant new btt + namespace
......@@ -511,8 +536,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
struct nvdimm_drvdata *ndd = nd_mapping->ndd;
struct nvdimm *nvdimm = nd_mapping->nvdimm;
kfree(nd_mapping->labels);
nd_mapping->labels = NULL;
mutex_lock(&nd_mapping->lock);
nd_mapping_free_labels(nd_mapping);
mutex_unlock(&nd_mapping->lock);
put_ndd(ndd);
nd_mapping->ndd = NULL;
if (ndd)
......@@ -522,11 +549,12 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (is_nd_pmem(dev))
return;
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent))
&& probe) {
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->ns_seed == dev)
nd_region_create_blk_seed(nd_region);
nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_btt(dev) && probe) {
......@@ -536,23 +564,30 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
nvdimm_bus_lock(dev);
if (nd_region->btt_seed == dev)
nd_region_create_btt_seed(nd_region);
if (nd_region->ns_seed == &nd_btt->ndns->dev &&
is_nd_blk(dev->parent))
nd_region_create_blk_seed(nd_region);
if (nd_region->ns_seed == &nd_btt->ndns->dev)
nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_pfn(dev) && probe) {
struct nd_pfn *nd_pfn = to_nd_pfn(dev);
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->pfn_seed == dev)
nd_region_create_pfn_seed(nd_region);
if (nd_region->ns_seed == &nd_pfn->ndns->dev)
nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_dax(dev) && probe) {
struct nd_dax *nd_dax = to_nd_dax(dev);
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->dax_seed == dev)
nd_region_create_dax_seed(nd_region);
if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
}
......@@ -759,10 +794,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
int ro = 0;
for (i = 0; i < ndr_desc->num_mappings; i++) {
struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm;
if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
if ((mapping->start | mapping->size) % SZ_4K) {
dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
caller, dev_name(&nvdimm->dev), i);
......@@ -813,11 +848,15 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
ndl->count = 0;
}
memcpy(nd_region->mapping, ndr_desc->nd_mapping,
sizeof(struct nd_mapping) * ndr_desc->num_mappings);
for (i = 0; i < ndr_desc->num_mappings; i++) {
struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm;
nd_region->mapping[i].nvdimm = nvdimm;
nd_region->mapping[i].start = mapping->start;
nd_region->mapping[i].size = mapping->size;
INIT_LIST_HEAD(&nd_region->mapping[i].labels);
mutex_init(&nd_region->mapping[i].lock);
get_device(&nvdimm->dev);
}
......
......@@ -406,6 +406,7 @@ void cd_forget(struct inode *inode)
spin_lock(&cdev_lock);
list_del_init(&inode->i_devices);
inode->i_cdev = NULL;
inode->i_mapping = &inode->i_data;
spin_unlock(&cdev_lock);
}
......
......@@ -50,23 +50,6 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc);
struct nd_namespace_label;
struct nvdimm_drvdata;
struct nd_mapping {
struct nvdimm *nvdimm;
struct nd_namespace_label **labels;
u64 start;
u64 size;
/*
* @ndd is for private use at region enable / disable time for
* get_ndd() + put_ndd(), all other nd_mapping to ndd
* conversions use to_ndd() which respects enabled state of the
* nvdimm.
*/
struct nvdimm_drvdata *ndd;
};
struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups;
unsigned long cmd_mask;
......@@ -89,9 +72,15 @@ struct nd_interleave_set {
u64 cookie;
};
struct nd_mapping_desc {
struct nvdimm *nvdimm;
u64 start;
u64 size;
};
struct nd_region_desc {
struct resource *res;
struct nd_mapping *nd_mapping;
struct nd_mapping_desc *mapping;
u16 num_mappings;
const struct attribute_group **attr_groups;
struct nd_interleave_set *nd_set;
......@@ -129,6 +118,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
......@@ -139,6 +130,7 @@ struct nd_blk_region *to_nd_blk_region(struct device *dev);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
const char *nvdimm_name(struct nvdimm *nvdimm);
struct kobject *nvdimm_kobj(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
......
......@@ -77,11 +77,13 @@ struct nd_namespace_io {
* @nsio: device and system physical address range to drive
* @alt_name: namespace name supplied in the dimm label
* @uuid: namespace name supplied in the dimm label
* @id: ida allocated id
*/
struct nd_namespace_pmem {
struct nd_namespace_io nsio;
char *alt_name;
u8 *uuid;
int id;
};
/**
......@@ -105,19 +107,19 @@ struct nd_namespace_blk {
struct resource **res;
};
static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
static inline struct nd_namespace_io *to_nd_namespace_io(const struct device *dev)
{
return container_of(dev, struct nd_namespace_io, common.dev);
}
static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
static inline struct nd_namespace_pmem *to_nd_namespace_pmem(const struct device *dev)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
return container_of(nsio, struct nd_namespace_pmem, nsio);
}
static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *dev)
{
return container_of(dev, struct nd_namespace_blk, common.dev);
}
......
......@@ -65,6 +65,7 @@
#define V9FS_MAGIC 0x01021997
#define BDEVFS_MAGIC 0x62646576
#define DAXFS_MAGIC 0x64646178
#define BINFMTFS_MAGIC 0x42494e4d
#define DEVPTS_SUPER_MAGIC 0x1cd1
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
......
......@@ -21,14 +21,16 @@ struct nd_cmd_smart {
} __packed;
#define ND_SMART_HEALTH_VALID (1 << 0)
#define ND_SMART_TEMP_VALID (1 << 1)
#define ND_SMART_SPARES_VALID (1 << 2)
#define ND_SMART_ALARM_VALID (1 << 3)
#define ND_SMART_USED_VALID (1 << 4)
#define ND_SMART_SHUTDOWN_VALID (1 << 5)
#define ND_SMART_VENDOR_VALID (1 << 6)
#define ND_SMART_TEMP_TRIP (1 << 0)
#define ND_SMART_SPARE_TRIP (1 << 1)
#define ND_SMART_SPARES_VALID (1 << 1)
#define ND_SMART_USED_VALID (1 << 2)
#define ND_SMART_TEMP_VALID (1 << 3)
#define ND_SMART_CTEMP_VALID (1 << 4)
#define ND_SMART_ALARM_VALID (1 << 9)
#define ND_SMART_SHUTDOWN_VALID (1 << 10)
#define ND_SMART_VENDOR_VALID (1 << 11)
#define ND_SMART_SPARE_TRIP (1 << 0)
#define ND_SMART_TEMP_TRIP (1 << 1)
#define ND_SMART_CTEMP_TRIP (1 << 2)
#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
#define ND_SMART_CRITICAL_HEALTH (1 << 1)
#define ND_SMART_FATAL_HEALTH (1 << 2)
......@@ -37,14 +39,15 @@ struct nd_smart_payload {
__u32 flags;
__u8 reserved0[4];
__u8 health;
__u16 temperature;
__u8 spares;
__u8 alarm_flags;
__u8 life_used;
__u8 alarm_flags;
__u16 temperature;
__u16 ctrl_temperature;
__u8 reserved1[15];
__u8 shutdown_state;
__u8 reserved1;
__u32 vendor_size;
__u8 vendor_data[108];
__u8 vendor_data[92];
} __packed;
struct nd_cmd_smart_threshold {
......@@ -53,7 +56,8 @@ struct nd_cmd_smart_threshold {
} __packed;
struct nd_smart_threshold_payload {
__u16 alarm_control;
__u8 alarm_control;
__u8 reserved0;
__u16 temperature;
__u8 spares;
__u8 reserved[3];
......
......@@ -13,6 +13,7 @@ ldflags-y += --wrap=__release_region
ldflags-y += --wrap=devm_memremap_pages
ldflags-y += --wrap=insert_resource
ldflags-y += --wrap=remove_resource
ldflags-y += --wrap=acpi_evaluate_object
DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
......
......@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "nfit_test.h"
......@@ -73,7 +74,7 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- nfit_res->res->start;
- nfit_res->res.start;
return fallback_fn(offset, size);
}
......@@ -84,7 +85,7 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- nfit_res->res->start;
- nfit_res->res.start;
return devm_ioremap_nocache(dev, offset, size);
}
EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
......@@ -95,7 +96,7 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res->start;
return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap(dev, offset, size, flags);
}
EXPORT_SYMBOL(__wrap_devm_memremap);
......@@ -107,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res->start;
return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap_pages(dev, res, ref, altmap);
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
......@@ -128,7 +129,7 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res->start;
return nfit_res->buf + offset - nfit_res->res.start;
return memremap(offset, size, flags);
}
EXPORT_SYMBOL(__wrap_memremap);
......@@ -174,6 +175,63 @@ void __wrap_memunmap(void *addr)
}
EXPORT_SYMBOL(__wrap_memunmap);
static bool nfit_test_release_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n);
static void nfit_devres_release(struct device *dev, void *data)
{
struct resource *res = *((struct resource **) data);
WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
resource_size(res)));
}
static int match(struct device *dev, void *__res, void *match_data)
{
struct resource *res = *((struct resource **) __res);
resource_size_t start = *((resource_size_t *) match_data);
return res->start == start;
}
static bool nfit_test_release_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n)
{
if (parent == &iomem_resource) {
struct nfit_test_resource *nfit_res = get_nfit_res(start);
if (nfit_res) {
struct nfit_test_request *req;
struct resource *res = NULL;
if (dev) {
devres_release(dev, nfit_devres_release, match,
&start);
return true;
}
spin_lock(&nfit_res->lock);
list_for_each_entry(req, &nfit_res->requests, list)
if (req->res.start == start) {
res = &req->res;
list_del(&req->list);
break;
}
spin_unlock(&nfit_res->lock);
WARN(!res || resource_size(res) != n,
"%s: start: %llx n: %llx mismatch: %pr\n",
__func__, start, n, res);
if (res)
kfree(req);
return true;
}
}
return false;
}
static struct resource *nfit_test_request_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n, const char *name, int flags)
......@@ -183,21 +241,57 @@ static struct resource *nfit_test_request_region(struct device *dev,
if (parent == &iomem_resource) {
nfit_res = get_nfit_res(start);
if (nfit_res) {
struct resource *res = nfit_res->res + 1;
struct nfit_test_request *req;
struct resource *res = NULL;
if (start + n > nfit_res->res->start
+ resource_size(nfit_res->res)) {
if (start + n > nfit_res->res.start
+ resource_size(&nfit_res->res)) {
pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
__func__, start, n,
nfit_res->res);
&nfit_res->res);
return NULL;
}
spin_lock(&nfit_res->lock);
list_for_each_entry(req, &nfit_res->requests, list)
if (start == req->res.start) {
res = &req->res;
break;
}
spin_unlock(&nfit_res->lock);
if (res) {
WARN(1, "%pr already busy\n", res);
return NULL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
return NULL;
INIT_LIST_HEAD(&req->list);
res = &req->res;
res->start = start;
res->end = start + n - 1;
res->name = name;
res->flags = resource_type(parent);
res->flags |= IORESOURCE_BUSY | flags;
spin_lock(&nfit_res->lock);
list_add(&req->list, &nfit_res->requests);
spin_unlock(&nfit_res->lock);
if (dev) {
struct resource **d;
d = devres_alloc(nfit_devres_release,
sizeof(struct resource *),
GFP_KERNEL);
if (!d)
return NULL;
*d = res;
devres_add(dev, d);
}
pr_debug("%s: %pr\n", __func__, res);
return res;
}
......@@ -241,29 +335,10 @@ struct resource *__wrap___devm_request_region(struct device *dev,
}
EXPORT_SYMBOL(__wrap___devm_request_region);
static bool nfit_test_release_region(struct resource *parent,
resource_size_t start, resource_size_t n)
{
if (parent == &iomem_resource) {
struct nfit_test_resource *nfit_res = get_nfit_res(start);
if (nfit_res) {
struct resource *res = nfit_res->res + 1;
if (start != res->start || resource_size(res) != n)
pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
__func__, start, n, res);
else
memset(res, 0, sizeof(*res));
return true;
}
}
return false;
}
void __wrap___release_region(struct resource *parent, resource_size_t start,
resource_size_t n)
{
if (!nfit_test_release_region(parent, start, n))
if (!nfit_test_release_region(NULL, parent, start, n))
__release_region(parent, start, n);
}
EXPORT_SYMBOL(__wrap___release_region);
......@@ -271,9 +346,25 @@ EXPORT_SYMBOL(__wrap___release_region);
void __wrap___devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n)
{
if (!nfit_test_release_region(parent, start, n))
if (!nfit_test_release_region(dev, parent, start, n))
__devm_release_region(dev, parent, start, n);
}
EXPORT_SYMBOL(__wrap___devm_release_region);
acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
struct acpi_object_list *p, struct acpi_buffer *buf)
{
struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
union acpi_object **obj;
if (!nfit_res || strcmp(path, "_FIT") || !buf)
return acpi_evaluate_object(handle, path, p, buf);
obj = nfit_res->buf;
buf->length = sizeof(union acpi_object);
buf->pointer = *obj;
return AE_OK;
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
MODULE_LICENSE("GPL v2");
......@@ -132,6 +132,8 @@ static u32 handle[NUM_DCR] = {
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
};
static unsigned long dimm_fail_cmd_flags[NUM_DCR];
struct nfit_test {
struct acpi_nfit_desc acpi_desc;
struct platform_device pdev;
......@@ -154,11 +156,14 @@ struct nfit_test {
int (*alloc)(struct nfit_test *t);
void (*setup)(struct nfit_test *t);
int setup_hotplug;
union acpi_object **_fit;
dma_addr_t _fit_dma;
struct ars_state {
struct nd_cmd_ars_status *ars_status;
unsigned long deadline;
spinlock_t lock;
} ars_state;
struct device *dimm_dev[NUM_DCR];
};
static struct nfit_test *to_nfit_test(struct device *dev)
......@@ -411,6 +416,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
if ((1 << func) & dimm_fail_cmd_flags[i])
return -EIO;
switch (func) {
case ND_CMD_GET_CONFIG_SIZE:
rc = nfit_test_cmd_get_config_size(buf, buf_len);
......@@ -428,6 +436,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case ND_CMD_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf, buf_len);
device_lock(&t->pdev.dev);
__acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
device_unlock(&t->pdev.dev);
break;
default:
return -ENOTTY;
......@@ -467,14 +478,12 @@ static struct nfit_test *instances[NUM_NFITS];
static void release_nfit_res(void *data)
{
struct nfit_test_resource *nfit_res = data;
struct resource *res = nfit_res->res;
spin_lock(&nfit_test_lock);
list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock);
vfree(nfit_res->buf);
kfree(res);
kfree(nfit_res);
}
......@@ -482,12 +491,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
void *buf)
{
struct device *dev = &t->pdev.dev;
struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
GFP_KERNEL);
int rc;
if (!res || !buf || !nfit_res)
if (!buf || !nfit_res)
goto err;
rc = devm_add_action(dev, release_nfit_res, nfit_res);
if (rc)
......@@ -496,10 +504,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
memset(buf, 0, size);
nfit_res->dev = dev;
nfit_res->buf = buf;
nfit_res->res = res;
res->start = *dma;
res->end = *dma + size - 1;
res->name = "NFIT";
nfit_res->res.start = *dma;
nfit_res->res.end = *dma + size - 1;
nfit_res->res.name = "NFIT";
spin_lock_init(&nfit_res->lock);
INIT_LIST_HEAD(&nfit_res->requests);
spin_lock(&nfit_test_lock);
list_add(&nfit_res->list, &t->resources);
spin_unlock(&nfit_test_lock);
......@@ -508,7 +517,6 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
err:
if (buf)
vfree(buf);
kfree(res);
kfree(nfit_res);
return NULL;
}
......@@ -533,13 +541,13 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
continue;
spin_lock(&nfit_test_lock);
list_for_each_entry(n, &t->resources, list) {
if (addr >= n->res->start && (addr < n->res->start
+ resource_size(n->res))) {
if (addr >= n->res.start && (addr < n->res.start
+ resource_size(&n->res))) {
nfit_res = n;
break;
} else if (addr >= (unsigned long) n->buf
&& (addr < (unsigned long) n->buf
+ resource_size(n->res))) {
+ resource_size(&n->res))) {
nfit_res = n;
break;
}
......@@ -564,6 +572,86 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
return 0;
}
static void put_dimms(void *data)
{
struct device **dimm_dev = data;
int i;
for (i = 0; i < NUM_DCR; i++)
if (dimm_dev[i])
device_unregister(dimm_dev[i]);
}
static struct class *nfit_test_dimm;
static int dimm_name_to_id(struct device *dev)
{
int dimm;
if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
|| dimm >= NUM_DCR || dimm < 0)
return -ENXIO;
return dimm;
}
static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int dimm = dimm_name_to_id(dev);
if (dimm < 0)
return dimm;
return sprintf(buf, "%#x", handle[dimm]);
}
DEVICE_ATTR_RO(handle);
static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
int dimm = dimm_name_to_id(dev);
if (dimm < 0)
return dimm;
return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
}
static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t size)
{
int dimm = dimm_name_to_id(dev);
unsigned long val;
ssize_t rc;
if (dimm < 0)
return dimm;
rc = kstrtol(buf, 0, &val);
if (rc)
return rc;
dimm_fail_cmd_flags[dimm] = val;
return size;
}
static DEVICE_ATTR_RW(fail_cmd);
static struct attribute *nfit_test_dimm_attributes[] = {
&dev_attr_fail_cmd.attr,
&dev_attr_handle.attr,
NULL,
};
static struct attribute_group nfit_test_dimm_attribute_group = {
.attrs = nfit_test_dimm_attributes,
};
static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
&nfit_test_dimm_attribute_group,
NULL,
};
static int nfit_test0_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
......@@ -616,6 +704,21 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
if (!t->_fit)
return -ENOMEM;
if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
return -ENOMEM;
for (i = 0; i < NUM_DCR; i++) {
t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
&t->pdev.dev, 0, NULL,
nfit_test_dimm_attribute_groups,
"test_dimm%d", i);
if (!t->dimm_dev[i])
return -ENOMEM;
}
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
......@@ -1409,6 +1512,8 @@ static int nfit_test_probe(struct platform_device *pdev)
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &pdev->dev;
struct nfit_test *nfit_test;
struct nfit_mem *nfit_mem;
union acpi_object *obj;
int rc;
nfit_test = to_nfit_test(&pdev->dev);
......@@ -1476,14 +1581,30 @@ static int nfit_test_probe(struct platform_device *pdev)
if (nfit_test->setup != nfit_test0_setup)
return 0;
flush_work(&acpi_desc->work);
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
nfit_test->nfit_size);
if (rc)
return rc;
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (!obj)
return -ENOMEM;
obj->type = ACPI_TYPE_BUFFER;
obj->buffer.length = nfit_test->nfit_size;
obj->buffer.pointer = nfit_test->nfit_buf;
*(nfit_test->_fit) = obj;
__acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
/* associate dimm devices with nfit_mem data for notification testing */
mutex_lock(&acpi_desc->init_mutex);
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
int i;
for (i = 0; i < NUM_DCR; i++)
if (nfit_handle == handle[i])
dev_set_drvdata(nfit_test->dimm_dev[i],
nfit_mem);
}
mutex_unlock(&acpi_desc->init_mutex);
return 0;
}
......@@ -1518,6 +1639,10 @@ static __init int nfit_test_init(void)
{
int rc, i;
nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
if (IS_ERR(nfit_test_dimm))
return PTR_ERR(nfit_test_dimm);
nfit_test_setup(nfit_test_lookup);
for (i = 0; i < NUM_NFITS; i++) {
......@@ -1584,6 +1709,7 @@ static __exit void nfit_test_exit(void)
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
nfit_test_teardown();
class_destroy(nfit_test_dimm);
}
module_init(nfit_test_init);
......
......@@ -13,11 +13,21 @@
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
#include <linux/list.h>
#include <linux/ioport.h>
#include <linux/spinlock_types.h>
struct nfit_test_request {
struct list_head list;
struct resource res;
};
struct nfit_test_resource {
struct list_head requests;
struct list_head list;
struct resource *res;
struct resource res;
struct device *dev;
spinlock_t lock;
int req_count;
void *buf;
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment