Commit 36092ee8 authored by Dan Williams's avatar Dan Williams

Merge branch 'for-4.7/dax' into libnvdimm-for-next

parents 1b982baf 03dca343
...@@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret) ...@@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret)
ret == -ENOIOCTLCMD; ret == -ENOIOCTLCMD;
} }
#ifdef CONFIG_FS_DAX
bool blkdev_dax_capable(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
if (!disk->fops->direct_access)
return false;
/*
* If the partition is not aligned on a page boundary, we can't
* do dax I/O to it.
*/
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
return false;
/*
* If the device has known bad blocks, force all I/O through the
* driver / page cache.
*
* TODO: support finer grained dax error handling
*/
if (disk->bb && disk->bb->count)
return false;
return true;
}
#endif
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
unsigned cmd, unsigned long arg) unsigned cmd, unsigned long arg)
{ {
...@@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, ...@@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP: case BLKTRACESETUP:
case BLKTRACETEARDOWN: case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp); return blk_trace_ioctl(bdev, cmd, argp);
case BLKDAXGET:
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
break;
case IOC_PR_REGISTER: case IOC_PR_REGISTER:
return blkdev_pr_register(bdev, argp); return blkdev_pr_register(bdev, argp);
case IOC_PR_RESERVE: case IOC_PR_RESERVE:
......
...@@ -190,6 +190,8 @@ source "drivers/android/Kconfig" ...@@ -190,6 +190,8 @@ source "drivers/android/Kconfig"
source "drivers/nvdimm/Kconfig" source "drivers/nvdimm/Kconfig"
source "drivers/dax/Kconfig"
source "drivers/nvmem/Kconfig" source "drivers/nvmem/Kconfig"
source "drivers/hwtracing/stm/Kconfig" source "drivers/hwtracing/stm/Kconfig"
......
...@@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/ ...@@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/ obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/ obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DEV_DAX) += dax/
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
obj-$(CONFIG_NUBUS) += nubus/ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/ obj-y += macintosh/
......
menuconfig DEV_DAX
tristate "DAX: direct access to differentiated memory"
default m if NVDIMM_DAX
depends on TRANSPARENT_HUGEPAGE
help
Support raw access to differentiated (persistence, bandwidth,
latency...) memory via an mmap(2) capable character
device. Platform firmware or a device driver may identify a
platform memory resource that is differentiated from the
baseline memory pool. Mappings of a /dev/daxX.Y device impose
restrictions that make the mapping behavior deterministic.
if DEV_DAX
config DEV_DAX_PMEM
tristate "PMEM DAX: direct access to persistent memory"
depends on NVDIMM_DAX
default DEV_DAX
help
Support raw access to persistent memory. Note that this
driver consumes memory ranges allocated and exported by the
libnvdimm sub-system.
Say Y if unsure
endif
obj-$(CONFIG_DEV_DAX) += dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
dax_pmem-y := pmem.o
This diff is collapsed.
/*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __DAX_H__
#define __DAX_H__
struct device;
struct resource;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent,
int region_id, struct resource *res, unsigned int align,
void *addr, unsigned long flags);
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
int count);
#endif /* __DAX_H__ */
/*
* Copyright(c) 2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/percpu-refcount.h>
#include <linux/memremap.h>
#include <linux/module.h>
#include <linux/pfn_t.h>
#include "../nvdimm/pfn.h"
#include "../nvdimm/nd.h"
#include "dax.h"
struct dax_pmem {
struct device *dev;
struct percpu_ref ref;
struct completion cmp;
};
struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
{
return container_of(ref, struct dax_pmem, ref);
}
static void dax_pmem_percpu_release(struct percpu_ref *ref)
{
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
dev_dbg(dax_pmem->dev, "%s\n", __func__);
complete(&dax_pmem->cmp);
}
static void dax_pmem_percpu_exit(void *data)
{
struct percpu_ref *ref = data;
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
dev_dbg(dax_pmem->dev, "%s\n", __func__);
percpu_ref_exit(ref);
wait_for_completion(&dax_pmem->cmp);
}
static void dax_pmem_percpu_kill(void *data)
{
struct percpu_ref *ref = data;
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
dev_dbg(dax_pmem->dev, "%s\n", __func__);
percpu_ref_kill(ref);
}
static int dax_pmem_probe(struct device *dev)
{
int rc;
void *addr;
struct resource res;
struct nd_pfn_sb *pfn_sb;
struct dax_pmem *dax_pmem;
struct nd_region *nd_region;
struct nd_namespace_io *nsio;
struct dax_region *dax_region;
struct nd_namespace_common *ndns;
struct nd_dax *nd_dax = to_nd_dax(dev);
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
struct vmem_altmap __altmap, *altmap = NULL;
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev);
/* parse the 'pfn' info block via ->rw_bytes */
devm_nsio_enable(dev, nsio);
altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
if (IS_ERR(altmap))
return PTR_ERR(altmap);
devm_nsio_disable(dev, nsio);
pfn_sb = nd_pfn->pfn_sb;
if (!devm_request_mem_region(dev, nsio->res.start,
resource_size(&nsio->res), dev_name(dev))) {
dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
return -EBUSY;
}
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
if (!dax_pmem)
return -ENOMEM;
dax_pmem->dev = dev;
init_completion(&dax_pmem->cmp);
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
GFP_KERNEL);
if (rc)
return rc;
rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
if (rc) {
dax_pmem_percpu_exit(&dax_pmem->ref);
return rc;
}
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
if (rc) {
dax_pmem_percpu_kill(&dax_pmem->ref);
return rc;
}
nd_region = to_nd_region(dev->parent);
dax_region = alloc_dax_region(dev, nd_region->id, &res,
le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
if (!dax_region)
return -ENOMEM;
/* TODO: support for subdividing a dax region... */
rc = devm_create_dax_dev(dax_region, &res, 1);
/* child dax_dev instances now own the lifetime of the dax_region */
dax_region_put(dax_region);
return rc;
}
static struct nd_device_driver dax_pmem_driver = {
.probe = dax_pmem_probe,
.drv = {
.name = "dax_pmem",
},
.type = ND_DRIVER_DAX_PMEM,
};
static int __init dax_pmem_init(void)
{
return nd_driver_register(&dax_pmem_driver);
}
module_init(dax_pmem_init);
static void __exit dax_pmem_exit(void)
{
driver_unregister(&dax_pmem_driver.drv);
}
module_exit(dax_pmem_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
...@@ -124,8 +124,9 @@ static int nvdimm_bus_remove(struct device *dev) ...@@ -124,8 +124,9 @@ static int nvdimm_bus_remove(struct device *dev)
struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
struct module *provider = to_bus_provider(dev); struct module *provider = to_bus_provider(dev);
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
int rc; int rc = 0;
if (nd_drv->remove)
rc = nd_drv->remove(dev); rc = nd_drv->remove(dev);
nd_region_disable(nvdimm_bus, dev); nd_region_disable(nvdimm_bus, dev);
...@@ -296,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, ...@@ -296,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
return -EINVAL; return -EINVAL;
} }
if (!nd_drv->probe || !nd_drv->remove) { if (!nd_drv->probe) {
pr_debug("->probe() and ->remove() must be specified\n"); pr_debug("%s ->probe() must be specified\n", mod_name);
return -EINVAL; return -EINVAL;
} }
......
...@@ -93,6 +93,25 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) ...@@ -93,6 +93,25 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
return true; return true;
} }
struct nd_pfn *to_nd_pfn_safe(struct device *dev)
{
/*
* pfn device attributes are re-used by dax device instances, so we
* need to be careful to correct device-to-nd_pfn conversion.
*/
if (is_nd_pfn(dev))
return to_nd_pfn(dev);
if (is_nd_dax(dev)) {
struct nd_dax *nd_dax = to_nd_dax(dev);
return &nd_dax->nd_pfn;
}
WARN_ON(1);
return NULL;
}
static void nd_detach_and_reset(struct device *dev, static void nd_detach_and_reset(struct device *dev,
struct nd_namespace_common **_ndns) struct nd_namespace_common **_ndns)
{ {
...@@ -106,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev, ...@@ -106,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev,
nd_btt->lbasize = 0; nd_btt->lbasize = 0;
kfree(nd_btt->uuid); kfree(nd_btt->uuid);
nd_btt->uuid = NULL; nd_btt->uuid = NULL;
} else if (is_nd_pfn(dev)) { } else if (is_nd_pfn(dev) || is_nd_dax(dev)) {
struct nd_pfn *nd_pfn = to_nd_pfn(dev); struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
kfree(nd_pfn->uuid); kfree(nd_pfn->uuid);
nd_pfn->uuid = NULL; nd_pfn->uuid = NULL;
......
...@@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void) ...@@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void)
nd_region_exit(); nd_region_exit();
nvdimm_exit(); nvdimm_exit();
nvdimm_bus_exit(); nvdimm_bus_exit();
nd_region_devs_exit();
nvdimm_devs_exit();
ida_destroy(&nd_ida);
} }
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mm.h> #include <linux/mm.h>
#include "nd-core.h" #include "nd-core.h"
#include "pfn.h"
#include "nd.h" #include "nd.h"
static void nd_dax_release(struct device *dev) static void nd_dax_release(struct device *dev)
...@@ -97,3 +98,37 @@ struct device *nd_dax_create(struct nd_region *nd_region) ...@@ -97,3 +98,37 @@ struct device *nd_dax_create(struct nd_region *nd_region)
__nd_device_register(dev); __nd_device_register(dev);
return dev; return dev;
} }
int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
{
int rc;
struct nd_dax *nd_dax;
struct device *dax_dev;
struct nd_pfn *nd_pfn;
struct nd_pfn_sb *pfn_sb;
struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
if (ndns->force_raw)
return -ENODEV;
nvdimm_bus_lock(&ndns->dev);
nd_dax = nd_dax_alloc(nd_region);
nd_pfn = &nd_dax->nd_pfn;
dax_dev = nd_pfn_devinit(nd_pfn, ndns);
nvdimm_bus_unlock(&ndns->dev);
if (!dax_dev)
return -ENOMEM;
pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn, DAX_SIG);
dev_dbg(dev, "%s: dax: %s\n", __func__,
rc == 0 ? dev_name(dax_dev) : "<none>");
if (rc < 0) {
__nd_detach_ndns(dax_dev, &nd_pfn->ndns);
put_device(dax_dev);
} else
__nd_device_register(dax_dev);
return rc;
}
EXPORT_SYMBOL(nd_dax_probe);
...@@ -552,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) ...@@ -552,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
void __exit nvdimm_devs_exit(void)
{
ida_destroy(&dimm_ida);
}
...@@ -49,6 +49,8 @@ bool is_nd_blk(struct device *dev); ...@@ -49,6 +49,8 @@ bool is_nd_blk(struct device *dev);
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void); int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void); void nvdimm_bus_exit(void);
void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region; struct nd_region;
void nd_region_create_blk_seed(struct nd_region *nd_region); void nd_region_create_blk_seed(struct nd_region *nd_region);
...@@ -92,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, ...@@ -92,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
ssize_t nd_namespace_store(struct device *dev, ssize_t nd_namespace_store(struct device *dev,
struct nd_namespace_common **_ndns, const char *buf, struct nd_namespace_common **_ndns, const char *buf,
size_t len); size_t len);
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
#endif /* __ND_CORE_H__ */ #endif /* __ND_CORE_H__ */
...@@ -232,7 +232,7 @@ bool is_nd_pfn(struct device *dev); ...@@ -232,7 +232,7 @@ bool is_nd_pfn(struct device *dev);
struct device *nd_pfn_create(struct nd_region *nd_region); struct device *nd_pfn_create(struct nd_region *nd_region);
struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
struct nd_namespace_common *ndns); struct nd_namespace_common *ndns);
int nd_pfn_validate(struct nd_pfn *nd_pfn); int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig);
extern struct attribute_group nd_pfn_attribute_group; extern struct attribute_group nd_pfn_attribute_group;
#else #else
static inline int nd_pfn_probe(struct device *dev, static inline int nd_pfn_probe(struct device *dev,
...@@ -251,7 +251,7 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region) ...@@ -251,7 +251,7 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region)
return NULL; return NULL;
} }
static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{ {
return -ENODEV; return -ENODEV;
} }
...@@ -259,9 +259,16 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) ...@@ -259,9 +259,16 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
struct nd_dax *to_nd_dax(struct device *dev); struct nd_dax *to_nd_dax(struct device *dev);
#if IS_ENABLED(CONFIG_NVDIMM_DAX) #if IS_ENABLED(CONFIG_NVDIMM_DAX)
int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns);
bool is_nd_dax(struct device *dev); bool is_nd_dax(struct device *dev);
struct device *nd_dax_create(struct nd_region *nd_region); struct device *nd_dax_create(struct nd_region *nd_region);
#else #else
static inline int nd_dax_probe(struct device *dev,
struct nd_namespace_common *ndns)
{
return -ENODEV;
}
static inline bool is_nd_dax(struct device *dev) static inline bool is_nd_dax(struct device *dev)
{ {
return false; return false;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define PFN_SIG_LEN 16 #define PFN_SIG_LEN 16
#define PFN_SIG "NVDIMM_PFN_INFO\0" #define PFN_SIG "NVDIMM_PFN_INFO\0"
#define DAX_SIG "NVDIMM_DAX_INFO\0"
struct nd_pfn_sb { struct nd_pfn_sb {
u8 signature[PFN_SIG_LEN]; u8 signature[PFN_SIG_LEN];
......
...@@ -54,25 +54,6 @@ struct nd_pfn *to_nd_pfn(struct device *dev) ...@@ -54,25 +54,6 @@ struct nd_pfn *to_nd_pfn(struct device *dev)
} }
EXPORT_SYMBOL(to_nd_pfn); EXPORT_SYMBOL(to_nd_pfn);
static struct nd_pfn *to_nd_pfn_safe(struct device *dev)
{
/*
* pfn device attributes are re-used by dax device instances, so we
* need to be careful to correct device-to-nd_pfn conversion.
*/
if (is_nd_pfn(dev))
return to_nd_pfn(dev);
if (is_nd_dax(dev)) {
struct nd_dax *nd_dax = to_nd_dax(dev);
return &nd_dax->nd_pfn;
}
WARN_ON(1);
return NULL;
}
static ssize_t mode_show(struct device *dev, static ssize_t mode_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
...@@ -360,7 +341,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) ...@@ -360,7 +341,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
return dev; return dev;
} }
int nd_pfn_validate(struct nd_pfn *nd_pfn) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{ {
u64 checksum, offset; u64 checksum, offset;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
...@@ -377,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) ...@@ -377,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
return -ENXIO; return -ENXIO;
if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
return -ENODEV; return -ENODEV;
checksum = le64_to_cpu(pfn_sb->checksum); checksum = le64_to_cpu(pfn_sb->checksum);
...@@ -416,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) ...@@ -416,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
return -ENODEV; return -ENODEV;
} }
if (nd_pfn->align == 0)
nd_pfn->align = le32_to_cpu(pfn_sb->align);
if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
nd_pfn->align, nvdimm_namespace_capacity(ndns)); nd_pfn->align, nvdimm_namespace_capacity(ndns));
...@@ -436,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) ...@@ -436,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
return -EBUSY; return -EBUSY;
} }
nd_pfn->align = le32_to_cpu(pfn_sb->align); if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
if (!is_power_of_2(offset) || offset < PAGE_SIZE) { || !IS_ALIGNED(offset, PAGE_SIZE)) {
dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
offset); offset);
return -ENXIO; return -ENXIO;
...@@ -467,7 +450,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) ...@@ -467,7 +450,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn = to_nd_pfn(pfn_dev); nd_pfn = to_nd_pfn(pfn_dev);
nd_pfn->pfn_sb = pfn_sb; nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn); rc = nd_pfn_validate(nd_pfn, PFN_SIG);
dev_dbg(dev, "%s: pfn: %s\n", __func__, dev_dbg(dev, "%s: pfn: %s\n", __func__,
rc == 0 ? dev_name(pfn_dev) : "<none>"); rc == 0 ? dev_name(pfn_dev) : "<none>");
if (rc < 0) { if (rc < 0) {
...@@ -552,6 +535,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) ...@@ -552,6 +535,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
unsigned long npfns; unsigned long npfns;
phys_addr_t offset; phys_addr_t offset;
const char *sig;
u64 checksum; u64 checksum;
int rc; int rc;
...@@ -560,7 +544,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) ...@@ -560,7 +544,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
return -ENOMEM; return -ENOMEM;
nd_pfn->pfn_sb = pfn_sb; nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn); if (is_nd_dax(&nd_pfn->dev))
sig = DAX_SIG;
else
sig = PFN_SIG;
rc = nd_pfn_validate(nd_pfn, sig);
if (rc != -ENODEV) if (rc != -ENODEV)
return rc; return rc;
...@@ -635,7 +623,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) ...@@ -635,7 +623,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->dataoff = cpu_to_le64(offset);
pfn_sb->npfns = cpu_to_le64(npfns); pfn_sb->npfns = cpu_to_le64(npfns);
memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); memcpy(pfn_sb->signature, sig, PFN_SIG_LEN);
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1); pfn_sb->version_major = cpu_to_le16(1);
......
...@@ -320,7 +320,8 @@ static int nd_pmem_probe(struct device *dev) ...@@ -320,7 +320,8 @@ static int nd_pmem_probe(struct device *dev)
return pmem_attach_disk(dev, ndns); return pmem_attach_disk(dev, ndns);
/* if we find a valid info-block we'll come back as that personality */ /* if we find a valid info-block we'll come back as that personality */
if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
|| nd_dax_probe(dev, ndns) == 0)
return -ENXIO; return -ENXIO;
/* ...otherwise we're just a raw pmem device */ /* ...otherwise we're just a raw pmem device */
......
...@@ -793,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -793,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
__func__); __func__);
} }
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
void __exit nd_region_devs_exit(void)
{
ida_destroy(&region_ida);
}
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/cleancache.h> #include <linux/cleancache.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/badblocks.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include "internal.h" #include "internal.h"
...@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size) ...@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size)
} }
EXPORT_SYMBOL(bd_set_size); EXPORT_SYMBOL(bd_set_size);
static bool blkdev_dax_capable(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
return false;
/*
* If the partition is not aligned on a page boundary, we can't
* do dax I/O to it.
*/
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
return false;
/*
* If the device has known bad blocks, force all I/O through the
* driver / page cache.
*
* TODO: support finer grained dax error handling
*/
if (disk->bb && disk->bb->count)
return false;
return true;
}
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
/* /*
...@@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = { ...@@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = {
.is_dirty_writeback = buffer_check_dirty_writeback, .is_dirty_writeback = buffer_check_dirty_writeback,
}; };
#ifdef CONFIG_FS_DAX
/*
* In the raw block case we do not need to contend with truncation nor
* unwritten file extents. Without those concerns there is no need for
* additional locking beyond the mmap_sem context that these routines
* are already executing under.
*
* Note, there is no protection if the block device is dynamically
* resized (partition grow/shrink) during a fault. A stable block device
* size is already not enforced in the blkdev_direct_IO path.
*
* For DAX, it is the responsibility of the block device driver to
* ensure the whole-disk device size is stable while requests are in
* flight.
*
* Finally, unlike the filemap_page_mkwrite() case there is no
* filesystem superblock to sync against freezing. We still include a
* pfn_mkwrite callback for dax drivers to receive write fault
* notifications.
*/
static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
return __dax_fault(vma, vmf, blkdev_get_block, NULL);
}
static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
return dax_pfn_mkwrite(vma, vmf);
}
static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, unsigned int flags)
{
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
}
static const struct vm_operations_struct blkdev_dax_vm_ops = {
.fault = blkdev_dax_fault,
.pmd_fault = blkdev_dax_pmd_fault,
.pfn_mkwrite = blkdev_dax_pfn_mkwrite,
};
static const struct vm_operations_struct blkdev_default_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
};
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *bd_inode = bdev_file_inode(file);
file_accessed(file);
if (IS_DAX(bd_inode)) {
vma->vm_ops = &blkdev_dax_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
} else {
vma->vm_ops = &blkdev_default_vm_ops;
}
return 0;
}
#else
#define blkdev_mmap generic_file_mmap
#endif
const struct file_operations def_blk_fops = { const struct file_operations def_blk_fops = {
.open = blkdev_open, .open = blkdev_open,
.release = blkdev_close, .release = blkdev_close,
.llseek = block_llseek, .llseek = block_llseek,
.read_iter = blkdev_read_iter, .read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter, .write_iter = blkdev_write_iter,
.mmap = blkdev_mmap, .mmap = generic_file_mmap,
.fsync = blkdev_fsync, .fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl, .unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
......
...@@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *); ...@@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
extern void emergency_thaw_all(void); extern void emergency_thaw_all(void);
extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
extern int fsync_bdev(struct block_device *); extern int fsync_bdev(struct block_device *);
#ifdef CONFIG_FS_DAX
extern bool blkdev_dax_capable(struct block_device *bdev);
#else
static inline bool blkdev_dax_capable(struct block_device *bdev)
{
return false;
}
#endif
extern struct super_block *blockdev_superblock; extern struct super_block *blockdev_superblock;
......
...@@ -222,7 +222,6 @@ struct fsxattr { ...@@ -222,7 +222,6 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125) #define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126) #define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127) #define BLKZEROOUT _IO(0x12,127)
#define BLKDAXGET _IO(0x12,129)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */ #define FIBMAP _IO(0x00,1) /* bmap access */
......
...@@ -1013,6 +1013,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, ...@@ -1013,6 +1013,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write); insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
return VM_FAULT_NOPAGE; return VM_FAULT_NOPAGE;
} }
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd) pmd_t *pmd)
......
...@@ -624,6 +624,7 @@ pgoff_t linear_hugepage_index(struct vm_area_struct *vma, ...@@ -624,6 +624,7 @@ pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
{ {
return vma_hugecache_offset(hstate_vma(vma), vma, address); return vma_hugecache_offset(hstate_vma(vma), vma, address);
} }
EXPORT_SYMBOL_GPL(linear_hugepage_index);
/* /*
* Return the size of the pages allocated when backing a VMA. In the majority * Return the size of the pages allocated when backing a VMA. In the majority
......
...@@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t ...@@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t
DRIVERS := ../../../drivers DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm NVDIMM_SRC := $(DRIVERS)/nvdimm
ACPI_SRC := $(DRIVERS)/acpi ACPI_SRC := $(DRIVERS)/acpi
DAX_SRC := $(DRIVERS)/dax
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
...@@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o ...@@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_ACPI_NFIT) += nfit.o obj-$(CONFIG_ACPI_NFIT) += nfit.o
obj-$(CONFIG_DEV_DAX) += dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/nfit.o nfit-y := $(ACPI_SRC)/nfit.o
nfit-y += config_check.o nfit-y += config_check.o
...@@ -39,6 +42,12 @@ nd_blk-y += config_check.o ...@@ -39,6 +42,12 @@ nd_blk-y += config_check.o
nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y := $(NVDIMM_SRC)/e820.o
nd_e820-y += config_check.o nd_e820-y += config_check.o
dax-y := $(DAX_SRC)/dax.o
dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem.o
dax_pmem-y += config_check.o
libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y := $(NVDIMM_SRC)/core.o
libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/bus.o
libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
......
...@@ -12,4 +12,6 @@ void check(void) ...@@ -12,4 +12,6 @@ void check(void)
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment