Commit 5ea6718b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'libnvdimm-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "The bulk of this has been in -next since before the merge window
  opened, with no known collisions / issues reported.

  The only detail worth noting, outside the summary below, is that the
  "libnvdimm-start-pad" topic has been truncated to just cleanups and
  small fixes. The full topic branch would have doubled down on hacks
  around the "section alignment" limitation of the core-mm, instead
  effort is now being spent to address that root issue in the memory
  hotplug implementation for v5.2.

   - Fix nfit-bus command submission regression

   - Support retrieval of short-ARS results if the ARS state is
     "requires continuation", and even if the "no_init_ars" module
     parameter is specified

   - Allow busy-polling of the kernel ARS state by allowing root to
     reset the exponential back-off timer

   - Filter potentially stale ARS results by tracking query-ARS relative
     to the previous start-ARS

   - Enhance dax_device alignment checks

   - Add support for the Hyper-V family of device-specific-methods
     (DSMs)

   - Add several fixes and workarounds for Hyper-V compatibility

   - Fix support to cache the dirty-shutdown-count at init"

* tag 'libnvdimm-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (25 commits)
  libnvdimm/namespace: Clean up holder_class_store()
  libnvdimm/of_pmem: Fix platform_no_drv_owner.cocci warnings
  acpi/nfit: Update NFIT flags error message
  libnvdimm/btt: Fix LBA masking during 'free list' population
  libnvdimm/btt: Remove unnecessary code in btt_freelist_init
  libnvdimm/pfn: Remove dax_label_reserve
  dax: Check the end of the block-device capacity with dax_direct_access()
  nfit/ars: Avoid stale ARS results
  nfit/ars: Allow root to busy-poll the ARS state machine
  nfit/ars: Introduce scrub_flags
  nfit/ars: Remove ars_start_flags
  nfit/ars: Attempt short-ARS even in the no_init_ars case
  nfit/ars: Attempt a short-ARS whenever the ARS state is idle at boot
  acpi/nfit: Require opt-in for read-only label configurations
  libnvdimm/pmem: Honor force_raw for legacy pmem regions
  libnvdimm/pfn: Account for PAGE_SIZE > info-block-size in nd_pfn_init()
  libnvdimm: Fix altmap reservation size calculation
  libnvdimm, pfn: Fix over-trim in trim_pfn_device()
  acpi/nfit: Fix bus command validation
  libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family
  ...
parents 3bb0f28d 4083014e
......@@ -4643,10 +4643,11 @@ S: Maintained
F: drivers/i2c/busses/i2c-diolan-u2c.c
FILESYSTEM DIRECT ACCESS (DAX)
M: Matthew Wilcox <willy@infradead.org>
M: Ross Zwisler <zwisler@kernel.org>
M: Jan Kara <jack@suse.cz>
M: Dan Williams <dan.j.williams@intel.com>
R: Matthew Wilcox <willy@infradead.org>
R: Jan Kara <jack@suse.cz>
L: linux-fsdevel@vger.kernel.org
L: linux-nvdimm@lists.01.org
S: Supported
F: fs/dax.c
F: include/linux/dax.h
......@@ -4654,9 +4655,9 @@ F: include/trace/events/fs_dax.h
DEVICE DIRECT ACCESS (DAX)
M: Dan Williams <dan.j.williams@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
M: Ross Zwisler <zwisler@kernel.org>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Keith Busch <keith.busch@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
L: linux-nvdimm@lists.01.org
S: Supported
F: drivers/dax/
......@@ -8812,7 +8813,6 @@ S: Maintained
F: tools/lib/lockdep/
LIBNVDIMM BLK: MMIO-APERTURE DRIVER
M: Ross Zwisler <zwisler@kernel.org>
M: Dan Williams <dan.j.williams@intel.com>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
......@@ -8825,7 +8825,6 @@ F: drivers/nvdimm/region_devs.c
LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
M: Vishal Verma <vishal.l.verma@intel.com>
M: Dan Williams <dan.j.williams@intel.com>
M: Ross Zwisler <zwisler@kernel.org>
M: Dave Jiang <dave.jiang@intel.com>
L: linux-nvdimm@lists.01.org
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
......@@ -8833,7 +8832,6 @@ S: Supported
F: drivers/nvdimm/btt*
LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
M: Ross Zwisler <zwisler@kernel.org>
M: Dan Williams <dan.j.williams@intel.com>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
......@@ -8852,9 +8850,10 @@ F: Documentation/devicetree/bindings/pmem/pmem-region.txt
LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
M: Dan Williams <dan.j.williams@intel.com>
M: Ross Zwisler <zwisler@kernel.org>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Dave Jiang <dave.jiang@intel.com>
M: Keith Busch <keith.busch@intel.com>
M: Ira Weiny <ira.weiny@intel.com>
L: linux-nvdimm@lists.01.org
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git
......
......@@ -55,6 +55,10 @@ static bool no_init_ars;
module_param(no_init_ars, bool, 0644);
MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
static bool force_labels;
module_param(force_labels, bool, 0444);
MODULE_PARM_DESC(force_labels, "Opt-in to labels despite missing methods");
LIST_HEAD(acpi_descs);
DEFINE_MUTEX(acpi_desc_lock);
......@@ -415,7 +419,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
if (call_pkg) {
int i;
if (nfit_mem->family != call_pkg->nd_family)
if (nfit_mem && nfit_mem->family != call_pkg->nd_family)
return -ENOTTY;
for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++)
......@@ -424,6 +428,10 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd,
return call_pkg->nd_command;
}
/* In the !call_pkg case, bus commands == bus functions */
if (!nfit_mem)
return cmd;
/* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */
if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
return cmd;
......@@ -454,17 +462,18 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
if (cmd_rc)
*cmd_rc = -EINVAL;
if (cmd == ND_CMD_CALL)
call_pkg = buf;
func = cmd_to_func(nfit_mem, cmd, call_pkg);
if (func < 0)
return func;
if (nvdimm) {
struct acpi_device *adev = nfit_mem->adev;
if (!adev)
return -ENOTTY;
if (cmd == ND_CMD_CALL)
call_pkg = buf;
func = cmd_to_func(nfit_mem, cmd, call_pkg);
if (func < 0)
return func;
dimm_name = nvdimm_name(nvdimm);
cmd_name = nvdimm_cmd_name(cmd);
cmd_mask = nvdimm_cmd_mask(nvdimm);
......@@ -475,11 +484,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
} else {
struct acpi_device *adev = to_acpi_dev(acpi_desc);
func = cmd;
cmd_name = nvdimm_bus_cmd_name(cmd);
cmd_mask = nd_desc->cmd_mask;
dsm_mask = cmd_mask;
if (cmd == ND_CMD_CALL)
dsm_mask = nd_desc->bus_dsm_mask;
desc = nd_cmd_bus_desc(cmd);
guid = to_nfit_uuid(NFIT_DEV_BUS);
......@@ -554,6 +560,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return -EINVAL;
}
if (out_obj->type != ACPI_TYPE_BUFFER) {
dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
dimm_name, cmd_name, out_obj->type);
rc = -EINVAL;
goto out;
}
if (call_pkg) {
call_pkg->nd_fw_size = out_obj->buffer.length;
memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
......@@ -572,13 +585,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return 0;
}
if (out_obj->package.type != ACPI_TYPE_BUFFER) {
dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
dimm_name, cmd_name, out_obj->type);
rc = -EINVAL;
goto out;
}
dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
cmd_name, out_obj->buffer.length);
print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
......@@ -1317,19 +1323,30 @@ static ssize_t scrub_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm_bus_descriptor *nd_desc;
struct acpi_nfit_desc *acpi_desc;
ssize_t rc = -ENXIO;
bool busy;
device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
if (!nd_desc) {
device_unlock(dev);
return rc;
}
acpi_desc = to_acpi_desc(nd_desc);
mutex_lock(&acpi_desc->init_mutex);
rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
acpi_desc->scrub_busy
&& !acpi_desc->cancel ? "+\n" : "\n");
mutex_unlock(&acpi_desc->init_mutex);
busy = test_bit(ARS_BUSY, &acpi_desc->scrub_flags)
&& !test_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, busy ? "+\n" : "\n");
/* Allow an admin to poll the busy state at a higher rate */
if (busy && capable(CAP_SYS_RAWIO) && !test_and_set_bit(ARS_POLL,
&acpi_desc->scrub_flags)) {
acpi_desc->scrub_tmo = 1;
mod_delayed_work(nfit_wq, &acpi_desc->dwork, HZ);
}
mutex_unlock(&acpi_desc->init_mutex);
device_unlock(dev);
return rc;
}
......@@ -1759,14 +1776,14 @@ static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
__weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
{
struct device *dev = &nfit_mem->adev->dev;
struct nd_intel_smart smart = { 0 };
union acpi_object in_buf = {
.type = ACPI_TYPE_BUFFER,
.buffer.pointer = (char *) &smart,
.buffer.length = sizeof(smart),
.buffer.type = ACPI_TYPE_BUFFER,
.buffer.length = 0,
};
union acpi_object in_obj = {
.type = ACPI_TYPE_PACKAGE,
.package.type = ACPI_TYPE_PACKAGE,
.package.count = 1,
.package.elements = &in_buf,
};
......@@ -1781,8 +1798,15 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
return;
out_obj = acpi_evaluate_dsm(handle, guid, revid, func, &in_obj);
if (!out_obj)
if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER
|| out_obj->buffer.length < sizeof(smart)) {
dev_dbg(dev->parent, "%s: failed to retrieve initial health\n",
dev_name(dev));
ACPI_FREE(out_obj);
return;
}
memcpy(&smart, out_obj->buffer.pointer, sizeof(smart));
ACPI_FREE(out_obj);
if (smart.flags & ND_INTEL_SMART_SHUTDOWN_VALID) {
if (smart.shutdown_state)
......@@ -1793,7 +1817,6 @@ __weak void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem)
set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags);
nfit_mem->dirty_shutdown = smart.shutdown_count;
}
ACPI_FREE(out_obj);
}
static void populate_shutdown_status(struct nfit_mem *nfit_mem)
......@@ -1861,9 +1884,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
dev_set_drvdata(&adev_dimm->dev, nfit_mem);
/*
* Until standardization materializes we need to consider 4
* different command sets. Note, that checking for function0 (bit0)
* tells us if any commands are reachable through this GUID.
* There are 4 "legacy" NVDIMM command sets
* (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
* an EFI working group was established to constrain this
* proliferation. The nfit driver probes for the supported command
* set by GUID. Note, if you're a platform developer looking to add
* a new command set to this probe, consider using an existing set,
* or otherwise seek approval to publish the command set at
* http://www.uefi.org/RFIC_LIST.
*
* Note, that checking for function0 (bit0) tells us if any commands
* are reachable through this GUID.
*/
for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
......@@ -1886,6 +1917,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
dsm_mask &= ~(1 << 8);
} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
dsm_mask = 0xffffffff;
} else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
dsm_mask = 0x1f;
} else {
dev_dbg(dev, "unknown dimm command family\n");
nfit_mem->family = -1;
......@@ -1915,8 +1948,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
| 1 << ND_CMD_SET_CONFIG_DATA;
if (family == NVDIMM_FAMILY_INTEL
&& (dsm_mask & label_mask) == label_mask)
return 0;
/* skip _LS{I,R,W} enabling */;
else {
if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
......@@ -1929,6 +1962,20 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
set_bit(NFIT_MEM_LSW, &nfit_mem->flags);
}
/*
* Quirk read-only label configurations to preserve
* access to label-less namespaces by default.
*/
if (!test_bit(NFIT_MEM_LSW, &nfit_mem->flags)
&& !force_labels) {
dev_dbg(dev, "%s: No _LSW, disable labels\n",
dev_name(&adev_dimm->dev));
clear_bit(NFIT_MEM_LSR, &nfit_mem->flags);
} else
dev_dbg(dev, "%s: Force enable labels\n",
dev_name(&adev_dimm->dev));
}
populate_shutdown_status(nfit_mem);
return 0;
......@@ -2027,6 +2074,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
}
/* Quirk to ignore LOCAL for labels on HYPERV DIMMs */
if (nfit_mem->family == NVDIMM_FAMILY_HYPERV)
set_bit(NDD_NOBLK, &flags);
if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
......@@ -2050,7 +2101,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
continue;
dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n",
dev_err(acpi_desc->dev, "Error found in NVDIMM %s flags:%s%s%s%s%s\n",
nvdimm_name(nvdimm),
mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
......@@ -2641,7 +2692,10 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc,
if (rc < 0)
return rc;
if (cmd_rc < 0)
return cmd_rc;
set_bit(ARS_VALID, &acpi_desc->scrub_flags);
return 0;
}
static int ars_continue(struct acpi_nfit_desc *acpi_desc)
......@@ -2651,11 +2705,11 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc)
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
memset(&ars_start, 0, sizeof(ars_start));
ars_start.address = ars_status->restart_address;
ars_start.length = ars_status->restart_length;
ars_start.type = ars_status->type;
ars_start.flags = acpi_desc->ars_start_flags;
ars_start = (struct nd_cmd_ars_start) {
.address = ars_status->restart_address,
.length = ars_status->restart_length,
.type = ars_status->type,
};
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
sizeof(ars_start), &cmd_rc);
if (rc < 0)
......@@ -2734,6 +2788,17 @@ static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
*/
if (ars_status->out_length < 44)
return 0;
/*
* Ignore potentially stale results that are only refreshed
* after a start-ARS event.
*/
if (!test_and_clear_bit(ARS_VALID, &acpi_desc->scrub_flags)) {
dev_dbg(acpi_desc->dev, "skip %d stale records\n",
ars_status->num_records);
return 0;
}
for (i = 0; i < ars_status->num_records; i++) {
/* only process full records */
if (ars_status->out_length
......@@ -3004,14 +3069,16 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc,
{
int rc;
if (no_init_ars || test_bit(ARS_FAILED, &nfit_spa->ars_state))
if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
return acpi_nfit_register_region(acpi_desc, nfit_spa);
set_bit(ARS_REQ_SHORT, &nfit_spa->ars_state);
if (!no_init_ars)
set_bit(ARS_REQ_LONG, &nfit_spa->ars_state);
switch (acpi_nfit_query_poison(acpi_desc)) {
case 0:
case -ENOSPC:
case -EAGAIN:
rc = ars_start(acpi_desc, nfit_spa, ARS_REQ_SHORT);
/* shouldn't happen, try again later */
......@@ -3036,7 +3103,6 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc,
break;
case -EBUSY:
case -ENOMEM:
case -ENOSPC:
/*
* BIOS was using ARS, wait for it to complete (or
* resources to become available) and then perform our
......@@ -3071,7 +3137,7 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
lockdep_assert_held(&acpi_desc->init_mutex);
if (acpi_desc->cancel)
if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags))
return 0;
if (query_rc == -EBUSY) {
......@@ -3145,7 +3211,7 @@ static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo)
{
lockdep_assert_held(&acpi_desc->init_mutex);
acpi_desc->scrub_busy = 1;
set_bit(ARS_BUSY, &acpi_desc->scrub_flags);
/* note this should only be set from within the workqueue */
if (tmo)
acpi_desc->scrub_tmo = tmo;
......@@ -3161,7 +3227,7 @@ static void notify_ars_done(struct acpi_nfit_desc *acpi_desc)
{
lockdep_assert_held(&acpi_desc->init_mutex);
acpi_desc->scrub_busy = 0;
clear_bit(ARS_BUSY, &acpi_desc->scrub_flags);
acpi_desc->scrub_count++;
if (acpi_desc->scrub_count_state)
sysfs_notify_dirent(acpi_desc->scrub_count_state);
......@@ -3182,6 +3248,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
else
notify_ars_done(acpi_desc);
memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
clear_bit(ARS_POLL, &acpi_desc->scrub_flags);
mutex_unlock(&acpi_desc->init_mutex);
}
......@@ -3216,6 +3283,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
struct nfit_spa *nfit_spa;
int rc;
set_bit(ARS_VALID, &acpi_desc->scrub_flags);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
switch (nfit_spa_type(nfit_spa->spa)) {
case NFIT_SPA_VOLATILE:
......@@ -3450,7 +3518,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa;
mutex_lock(&acpi_desc->init_mutex);
if (acpi_desc->cancel) {
if (test_bit(ARS_CANCEL, &acpi_desc->scrub_flags)) {
mutex_unlock(&acpi_desc->init_mutex);
return 0;
}
......@@ -3529,7 +3597,7 @@ void acpi_nfit_shutdown(void *data)
mutex_unlock(&acpi_desc_lock);
mutex_lock(&acpi_desc->init_mutex);
acpi_desc->cancel = 1;
set_bit(ARS_CANCEL, &acpi_desc->scrub_flags);
cancel_delayed_work_sync(&acpi_desc->dwork);
mutex_unlock(&acpi_desc->init_mutex);
......@@ -3729,6 +3797,7 @@ static __init int nfit_init(void)
guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);
nfit_wq = create_singlethread_workqueue("nfit");
if (!nfit_wq)
......
......@@ -34,11 +34,14 @@
/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
/* http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901") */
#define UUID_NFIT_DIMM_N_HYPERV "5746c5f2-a9a2-4264-ad0e-e4ddc9e09e80"
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
#define NVDIMM_STANDARD_CMDMASK \
(1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
......@@ -94,6 +97,7 @@ enum nfit_uuids {
NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV,
NFIT_SPA_VOLATILE,
NFIT_SPA_PM,
NFIT_SPA_DCR,
......@@ -210,6 +214,13 @@ struct nfit_mem {
int family;
};
enum scrub_flags {
ARS_BUSY,
ARS_CANCEL,
ARS_VALID,
ARS_POLL,
};
struct acpi_nfit_desc {
struct nvdimm_bus_descriptor nd_desc;
struct acpi_table_header acpi_header;
......@@ -223,7 +234,6 @@ struct acpi_nfit_desc {
struct list_head idts;
struct nvdimm_bus *nvdimm_bus;
struct device *dev;
u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status;
struct nfit_spa *scrub_spa;
struct delayed_work dwork;
......@@ -232,8 +242,7 @@ struct acpi_nfit_desc {
unsigned int max_ars;
unsigned int scrub_count;
unsigned int scrub_mode;
unsigned int scrub_busy:1;
unsigned int cancel:1;
unsigned long scrub_flags;
unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en;
unsigned long bus_nfit_cmd_force_en;
......
......@@ -86,12 +86,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
struct dax_device *dax_dev;
bool dax_enabled = false;
pgoff_t pgoff, pgoff_end;
struct request_queue *q;
pgoff_t pgoff;
int err, id;
pfn_t pfn;
long len;
char buf[BDEVNAME_SIZE];
void *kaddr, *end_kaddr;
pfn_t pfn, end_pfn;
sector_t last_page;
long len, len2;
int err, id;
if (blocksize != PAGE_SIZE) {
pr_debug("%s: error: unsupported blocksize for dax\n",
......@@ -113,6 +115,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
return false;
}
last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
if (err) {
pr_debug("%s: error: unaligned partition for dax\n",
bdevname(bdev, buf));
return false;
}
dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
if (!dax_dev) {
pr_debug("%s: error: device does not support dax\n",
......@@ -121,14 +131,15 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
}
id = dax_read_lock();
len = dax_direct_access(dax_dev, pgoff, 1, NULL, &pfn);
len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
dax_read_unlock(id);
put_dax(dax_dev);
if (len < 1) {
if (len < 1 || len2 < 1) {
pr_debug("%s: error: dax access failed (%ld)\n",
bdevname(bdev, buf), len);
bdevname(bdev, buf), len < 1 ? len : len2);
return false;
}
......@@ -143,13 +154,20 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
*/
WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
dax_enabled = true;
} else if (pfn_t_devmap(pfn)) {
struct dev_pagemap *pgmap;
} else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
struct dev_pagemap *pgmap, *end_pgmap;
pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX)
end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX
&& pfn_t_to_page(pfn)->pgmap == pgmap
&& pfn_t_to_page(end_pfn)->pgmap == pgmap
&& pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
&& pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr)))
dax_enabled = true;
put_dev_pagemap(pgmap);
put_dev_pagemap(end_pgmap);
}
if (!dax_enabled) {
......
......@@ -541,9 +541,9 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
static int btt_freelist_init(struct arena_info *arena)
{
int old, new, ret;
u32 i, map_entry;
struct log_entry log_new, log_old;
int new, ret;
struct log_entry log_new;
u32 i, map_entry, log_oldmap, log_newmap;
arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
GFP_KERNEL);
......@@ -551,24 +551,26 @@ static int btt_freelist_init(struct arena_info *arena)
return -ENOMEM;
for (i = 0; i < arena->nfree; i++) {
old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
if (old < 0)
return old;
new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
if (new < 0)
return new;
/* old and new map entries with any flags stripped out */
log_oldmap = ent_lba(le32_to_cpu(log_new.old_map));
log_newmap = ent_lba(le32_to_cpu(log_new.new_map));
/* sub points to the next one to be overwritten */
arena->freelist[i].sub = 1 - new;
arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
arena->freelist[i].block = le32_to_cpu(log_new.old_map);
arena->freelist[i].block = log_oldmap;
/*
* FIXME: if error clearing fails during init, we want to make
* the BTT read-only
*/
if (ent_e_flag(log_new.old_map)) {
if (ent_e_flag(log_new.old_map) &&
!ent_normal(log_new.old_map)) {
arena->freelist[i].has_err = 1;
ret = arena_clear_freelist_error(arena, i);
if (ret)
dev_err_ratelimited(to_dev(arena),
......@@ -576,7 +578,7 @@ static int btt_freelist_init(struct arena_info *arena)
}
/* This implies a newly created or untouched flog entry */
if (log_new.old_map == log_new.new_map)
if (log_oldmap == log_newmap)
continue;
/* Check if map recovery is needed */
......@@ -584,8 +586,15 @@ static int btt_freelist_init(struct arena_info *arena)
NULL, NULL, 0);
if (ret)
return ret;
if ((le32_to_cpu(log_new.new_map) != map_entry) &&
(le32_to_cpu(log_new.old_map) == map_entry)) {
/*
* The map_entry from btt_read_map is stripped of any flag bits,
* so use the stripped out versions from the log as well for
* testing whether recovery is needed. For restoration, use the
* 'raw' version of the log entries as that captured what we
* were going to write originally.
*/
if ((log_newmap != map_entry) && (log_oldmap == map_entry)) {
/*
* Last transaction wrote the flog, but wasn't able
* to complete the map write. So fix up the map.
......
......@@ -44,6 +44,8 @@
#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
/* 'normal' is both e and z flags set */
#define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent))
enum btt_init_state {
INIT_UNCHECKED = 0,
......
......@@ -159,11 +159,19 @@ static ssize_t size_show(struct device *dev,
}
static DEVICE_ATTR_RO(size);
static ssize_t log_zero_flags_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "Y\n");
}
static DEVICE_ATTR_RO(log_zero_flags);
static struct attribute *nd_btt_attributes[] = {
&dev_attr_sector_size.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
&dev_attr_size.attr,
&dev_attr_log_zero_flags.attr,
NULL,
};
......
......@@ -11,6 +11,7 @@
* General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
#include <linux/device.h>
#include <linux/ndctl.h>
......@@ -25,6 +26,10 @@
static DEFINE_IDA(dimm_ida);
static bool noblk;
module_param(noblk, bool, 0444);
MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
/*
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
......@@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
nvdimm->dimm_id = dimm_id;
nvdimm->provider_data = provider_data;
if (noblk)
flags |= 1 << NDD_NOBLK;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
nvdimm->num_flush = num_flush;
......
......@@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
return 0; /* no label, nothing to reserve */
for_each_clear_bit_le(slot, free, nslot) {
struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
struct nd_namespace_label *nd_label;
struct nd_region *nd_region = NULL;
u8 label_uuid[NSLABEL_UUID_LEN];
......@@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
flags = __le32_to_cpu(nd_label->flags);
if (test_bit(NDD_NOBLK, &nvdimm->flags))
flags &= ~NSLABEL_FLAG_LOCAL;
nd_label_gen_id(&label_id, label_uuid, flags);
res = nvdimm_allocate_dpa(ndd, &label_id,
__le64_to_cpu(nd_label->dpa),
......@@ -755,7 +758,7 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos)
int pos, unsigned long flags)
{
struct nd_namespace_common *ndns = &nspm->nsio.common;
struct nd_interleave_set *nd_set = nd_region->nd_set;
......@@ -796,7 +799,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
if (nspm->alt_name)
memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING);
nd_label->flags = __cpu_to_le32(flags);
nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie);
......@@ -1249,13 +1252,13 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
int nd_pmem_namespace_label_update(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm, resource_size_t size)
{
int i;
int i, rc;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res;
int rc, count = 0;
int count = 0;
if (size == 0) {
rc = del_labels(nd_mapping, nspm->uuid);
......@@ -1273,7 +1276,20 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
if (rc < 0)
return rc;
rc = __pmem_label_update(nd_region, nd_mapping, nspm, i);
rc = __pmem_label_update(nd_region, nd_mapping, nspm, i,
NSLABEL_FLAG_UPDATING);
if (rc)
return rc;
}
if (size == 0)
return 0;
/* Clear the UPDATING flag per UEFI 2.7 expectations */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, 0);
if (rc)
return rc;
}
......
......@@ -138,6 +138,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
bool pmem_should_map_pages(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_namespace_common *ndns = to_ndns(dev);
struct nd_namespace_io *nsio;
if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
......@@ -149,6 +150,9 @@ bool pmem_should_map_pages(struct device *dev)
if (is_nd_pfn(dev) || is_nd_btt(dev))
return false;
if (ndns->force_raw)
return false;
nsio = to_nd_namespace_io(dev);
if (region_intersects(nsio->res.start, resource_size(&nsio->res),
IORESOURCE_SYSTEM_RAM,
......@@ -1506,13 +1510,13 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf)
if (dev->driver || ndns->claim)
return -EBUSY;
if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0)
if (sysfs_streq(buf, "btt"))
ndns->claim_class = btt_claim_class(dev);
else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0)
else if (sysfs_streq(buf, "pfn"))
ndns->claim_class = NVDIMM_CCLASS_PFN;
else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0)
else if (sysfs_streq(buf, "dax"))
ndns->claim_class = NVDIMM_CCLASS_DAX;
else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0)
else if (sysfs_streq(buf, ""))
ndns->claim_class = NVDIMM_CCLASS_NONE;
else
return -EINVAL;
......@@ -2492,6 +2496,12 @@ static int init_active_labels(struct nd_region *nd_region)
if (!label_ent)
break;
label = nd_label_active(ndd, j);
if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
u32 flags = __le32_to_cpu(label->flags);
flags &= ~NSLABEL_FLAG_LOCAL;
label->flags = __cpu_to_le32(flags);
}
label_ent->label = label;
mutex_lock(&nd_mapping->lock);
......
......@@ -108,7 +108,6 @@ static struct platform_driver of_pmem_region_driver = {
.remove = of_pmem_region_remove,
.driver = {
.name = "of_pmem",
.owner = THIS_MODULE,
.of_match_table = of_pmem_region_match,
},
};
......
......@@ -580,6 +580,11 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
}
EXPORT_SYMBOL(nd_pfn_probe);
static u32 info_block_reserve(void)
{
return ALIGN(SZ_8K, PAGE_SIZE);
}
/*
* We hotplug memory at section granularity, pad the reserved area from
* the previous section base to the namespace base address.
......@@ -593,7 +598,7 @@ static unsigned long init_altmap_base(resource_size_t base)
static unsigned long init_altmap_reserve(resource_size_t base)
{
unsigned long reserve = PHYS_PFN(SZ_8K);
unsigned long reserve = info_block_reserve() >> PAGE_SHIFT;
unsigned long base_pfn = PHYS_PFN(base);
reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
......@@ -608,6 +613,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
u32 reserve = info_block_reserve();
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t base = nsio->res.start + start_pad;
......@@ -621,7 +627,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
res->end -= end_trunc;
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < SZ_8K)
if (offset < reserve)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
pgmap->altmap_valid = false;
......@@ -634,7 +640,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
le64_to_cpu(nd_pfn->pfn_sb->npfns),
nd_pfn->npfns);
memcpy(altmap, &__altmap, sizeof(*altmap));
altmap->free = PHYS_PFN(offset - SZ_8K);
altmap->free = PHYS_PFN(offset - reserve);
altmap->alloc = 0;
pgmap->altmap_valid = true;
} else
......@@ -678,18 +684,17 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun
if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
IORES_DESC_NONE) == REGION_MIXED
|| !IS_ALIGNED(end, nd_pfn->align)
|| nd_region_conflict(nd_region, start, size + adjust))
|| nd_region_conflict(nd_region, start, size))
*end_trunc = end - phys_pmem_align_down(nd_pfn, end);
}
static int nd_pfn_init(struct nd_pfn *nd_pfn)
{
u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
u32 start_pad, end_trunc, reserve = info_block_reserve();
resource_size_t start, size;
struct nd_region *nd_region;
u32 start_pad, end_trunc;
struct nd_pfn_sb *pfn_sb;
unsigned long npfns;
phys_addr_t offset;
......@@ -734,7 +739,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
*/
start = nsio->res.start + start_pad;
size = resource_size(&nsio->res);
npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K)
npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve)
/ PAGE_SIZE);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
......@@ -742,11 +747,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*/
offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
offset = ALIGN(start + reserve + 64 * npfns,
max(nd_pfn->align, PMD_SIZE)) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
offset = ALIGN(start + SZ_8K + dax_label_reserve,
nd_pfn->align) - start;
offset = ALIGN(start + reserve, nd_pfn->align) - start;
else
return -ENXIO;
......
......@@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (test_bit(NDD_UNARMED, &nvdimm->flags))
ro = 1;
if (test_bit(NDD_NOBLK, &nvdimm->flags)
&& dev_type == &nd_blk_device_type) {
dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
caller, dev_name(&nvdimm->dev), i);
return NULL;
}
}
if (dev_type == &nd_blk_device_type) {
......
......@@ -42,6 +42,8 @@ enum {
NDD_SECURITY_OVERWRITE = 3,
/* tracking whether or not there is a pending device reference */
NDD_WORK_PENDING = 4,
/* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
NDD_NOBLK = 5,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
......
......@@ -243,6 +243,7 @@ struct nd_cmd_pkg {
#define NVDIMM_FAMILY_HPE1 1
#define NVDIMM_FAMILY_HPE2 2
#define NVDIMM_FAMILY_MSFT 3
#define NVDIMM_FAMILY_HYPERV 4
#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
struct nd_cmd_pkg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment