Commit c2356983 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl

Pull cxl updates from Dan Williams:
 "Compute Express Link (CXL) updates for 6.0:

   - Introduce a 'struct cxl_region' object with support for
     provisioning and assembling persistent memory regions.

   - Introduce alloc_free_mem_region() to accompany the existing
     request_free_mem_region() as a method to allocate physical memory
     capacity out of an existing resource.

   - Export insert_resource_expand_to_fit() for the CXL subsystem to
     late-publish CXL platform windows in iomem_resource.

   - Add a polled mode PCI DOE (Data Object Exchange) driver service and
     use it in cxl_pci to retrieve the CDAT (Coherent Device Attribute
     Table)"

* tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (74 commits)
  cxl/hdm: Fix skip allocations vs multiple pmem allocations
  cxl/region: Disallow region granularity != window granularity
  cxl/region: Fix x1 interleave to greater than x1 interleave routing
  cxl/region: Move HPA setup to cxl_region_attach()
  cxl/region: Fix decoder interleave programming
  Documentation: cxl: remove dangling kernel-doc reference
  cxl/region: describe targets and nr_targets members of cxl_region_params
  cxl/regions: add padding for cxl_rr_ep_add nested lists
  cxl/region: Fix IS_ERR() vs NULL check
  cxl/region: Fix region reference target accounting
  cxl/region: Fix region commit uninitialized variable warning
  cxl/region: Fix port setup uninitialized variable warnings
  cxl/region: Stop initializing interleave granularity
  cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime
  cxl/acpi: Minimize granularity for x1 interleaves
  cxl/region: Delete 'region' attribute from root decoders
  cxl/acpi: Autoload driver for 'cxl_acpi' test devices
  cxl/region: decrement ->nr_targets on error in cxl_region_attach()
  cxl/region: prevent underflow in ways_to_cxl()
  cxl/region: uninitialized variable in alloc_hpa()
  ...
parents 5e2e7383 1cd8a253
...@@ -516,6 +516,7 @@ ForEachMacros: ...@@ -516,6 +516,7 @@ ForEachMacros:
- 'of_property_for_each_string' - 'of_property_for_each_string'
- 'of_property_for_each_u32' - 'of_property_for_each_u32'
- 'pci_bus_for_each_resource' - 'pci_bus_for_each_resource'
- 'pci_doe_for_each_off'
- 'pcl_for_each_chunk' - 'pcl_for_each_chunk'
- 'pcl_for_each_segment' - 'pcl_for_each_segment'
- 'pcm_for_each_format' - 'pcm_for_each_format'
......
This diff is collapsed.
...@@ -362,6 +362,14 @@ CXL Core ...@@ -362,6 +362,14 @@ CXL Core
.. kernel-doc:: drivers/cxl/core/mbox.c .. kernel-doc:: drivers/cxl/core/mbox.c
:doc: cxl mbox :doc: cxl mbox
CXL Regions
-----------
.. kernel-doc:: drivers/cxl/core/region.c
:doc: cxl core region
.. kernel-doc:: drivers/cxl/core/region.c
:identifiers:
External Interfaces External Interfaces
=================== ===================
......
...@@ -55,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start) ...@@ -55,6 +55,7 @@ int memory_add_physaddr_to_nid(u64 start)
{ {
return hot_add_scn_to_nid(start); return hot_add_scn_to_nid(start);
} }
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif #endif
int __weak create_section_mapping(unsigned long start, unsigned long end, int __weak create_section_mapping(unsigned long start, unsigned long end,
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
menuconfig CXL_BUS menuconfig CXL_BUS
tristate "CXL (Compute Express Link) Devices Support" tristate "CXL (Compute Express Link) Devices Support"
depends on PCI depends on PCI
select PCI_DOE
help help
CXL is a bus that is electrically compatible with PCI Express, but CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and layers three protocols on that signalling (CXL.io, CXL.cache, and
...@@ -102,4 +103,12 @@ config CXL_SUSPEND ...@@ -102,4 +103,12 @@ config CXL_SUSPEND
def_bool y def_bool y
depends on SUSPEND && CXL_MEM depends on SUSPEND && CXL_MEM
config CXL_REGION
bool
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
endif endif
This diff is collapsed.
...@@ -10,3 +10,4 @@ cxl_core-y += memdev.o ...@@ -10,3 +10,4 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o cxl_core-y += mbox.o
cxl_core-y += pci.o cxl_core-y += pci.o
cxl_core-y += hdm.o cxl_core-y += hdm.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
...@@ -9,6 +9,36 @@ extern const struct device_type cxl_nvdimm_type; ...@@ -9,6 +9,36 @@ extern const struct device_type cxl_nvdimm_type;
extern struct attribute_group cxl_base_attribute_group; extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
{
}
static inline int cxl_region_init(void)
{
return 0;
}
static inline void cxl_region_exit(void)
{
}
#define CXL_REGION_ATTR(x) NULL
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
#endif
struct cxl_send_command; struct cxl_send_command;
struct cxl_mem_query_commands; struct cxl_mem_query_commands;
int cxl_query_cmd(struct cxl_memdev *cxlmd, int cxl_query_cmd(struct cxl_memdev *cxlmd,
...@@ -17,9 +47,28 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); ...@@ -17,9 +47,28 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length); resource_size_t length);
struct dentry *cxl_debugfs_create_dir(const char *dir);
int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
enum cxl_decoder_mode mode);
int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
bool is_switch_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
{
if (!port)
return NULL;
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
}
int cxl_memdev_init(void); int cxl_memdev_init(void);
void cxl_memdev_exit(void); void cxl_memdev_exit(void);
void cxl_mbox_init(void); void cxl_mbox_init(void);
void cxl_mbox_exit(void);
#endif /* __CXL_CORE_H__ */ #endif /* __CXL_CORE_H__ */
This diff is collapsed.
...@@ -718,12 +718,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); ...@@ -718,12 +718,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
*/ */
static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds) static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{ {
struct cxl_mbox_get_partition_info { struct cxl_mbox_get_partition_info pi;
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed pi;
int rc; int rc;
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0, rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
...@@ -773,15 +768,6 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) ...@@ -773,15 +768,6 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
cxlds->partition_align_bytes = cxlds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER; le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
dev_dbg(cxlds->dev,
"Identify Memory Device\n"
" total_bytes = %#llx\n"
" volatile_only_bytes = %#llx\n"
" persistent_only_bytes = %#llx\n"
" partition_align_bytes = %#llx\n",
cxlds->total_bytes, cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, cxlds->partition_align_bytes);
cxlds->lsa_size = le32_to_cpu(id.lsa_size); cxlds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision)); memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
...@@ -789,42 +775,63 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) ...@@ -789,42 +775,63 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
} }
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
int cxl_mem_create_range_info(struct cxl_dev_state *cxlds) static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
{ {
int rc; int rc;
if (cxlds->partition_align_bytes == 0) { res->name = type;
cxlds->ram_range.start = 0; res->start = start;
cxlds->ram_range.end = cxlds->volatile_only_bytes - 1; res->end = start + size - 1;
cxlds->pmem_range.start = cxlds->volatile_only_bytes; res->flags = IORESOURCE_MEM;
cxlds->pmem_range.end = cxlds->volatile_only_bytes + if (resource_size(res) == 0) {
cxlds->persistent_only_bytes - 1; dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0; return 0;
} }
rc = request_resource(parent, res);
rc = cxl_mem_get_partition_info(cxlds);
if (rc) { if (rc) {
dev_err(cxlds->dev, "Failed to query partition information\n"); dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
res, rc);
return rc; return rc;
} }
dev_dbg(cxlds->dev, dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
"Get Partition Info\n"
" active_volatile_bytes = %#llx\n" return 0;
" active_persistent_bytes = %#llx\n" }
" next_volatile_bytes = %#llx\n"
" next_persistent_bytes = %#llx\n",
cxlds->active_volatile_bytes, cxlds->active_persistent_bytes,
cxlds->next_volatile_bytes, cxlds->next_persistent_bytes);
cxlds->ram_range.start = 0; int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
cxlds->ram_range.end = cxlds->active_volatile_bytes - 1; {
struct device *dev = cxlds->dev;
int rc;
cxlds->pmem_range.start = cxlds->active_volatile_bytes; cxlds->dpa_res =
cxlds->pmem_range.end = (struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
cxlds->active_volatile_bytes + cxlds->active_persistent_bytes - 1;
return 0; if (cxlds->partition_align_bytes == 0) {
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->volatile_only_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->volatile_only_bytes,
cxlds->persistent_only_bytes, "pmem");
}
rc = cxl_mem_get_partition_info(cxlds);
if (rc) {
dev_err(dev, "Failed to query partition information\n");
return rc;
}
rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
cxlds->active_volatile_bytes, "ram");
if (rc)
return rc;
return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
cxlds->active_volatile_bytes,
cxlds->active_persistent_bytes, "pmem");
} }
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL); EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
...@@ -845,19 +852,11 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev) ...@@ -845,19 +852,11 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
} }
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL); EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
static struct dentry *cxl_debugfs;
void __init cxl_mbox_init(void) void __init cxl_mbox_init(void)
{ {
struct dentry *mbox_debugfs; struct dentry *mbox_debugfs;
cxl_debugfs = debugfs_create_dir("cxl", NULL); mbox_debugfs = cxl_debugfs_create_dir("mbox");
mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs, debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all); &cxl_raw_allow_all);
} }
void cxl_mbox_exit(void)
{
debugfs_remove_recursive(cxl_debugfs);
}
...@@ -68,7 +68,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, ...@@ -68,7 +68,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
{ {
struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->ram_range); unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len); return sysfs_emit(buf, "%#llx\n", len);
} }
...@@ -81,7 +81,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, ...@@ -81,7 +81,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
{ {
struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dev_state *cxlds = cxlmd->cxlds;
unsigned long long len = range_len(&cxlds->pmem_range); unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len); return sysfs_emit(buf, "%#llx\n", len);
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/pci-doe.h>
#include <cxlpci.h> #include <cxlpci.h>
#include <cxlmem.h> #include <cxlmem.h>
#include <cxl.h> #include <cxl.h>
...@@ -225,7 +226,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg) ...@@ -225,7 +226,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
{ {
struct range *dev_range = arg; struct range *dev_range = arg;
struct cxl_decoder *cxld; struct cxl_decoder *cxld;
struct range root_range;
if (!is_root_decoder(dev)) if (!is_root_decoder(dev))
return 0; return 0;
...@@ -237,12 +237,7 @@ static int dvsec_range_allowed(struct device *dev, void *arg) ...@@ -237,12 +237,7 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
if (!(cxld->flags & CXL_DECODER_F_RAM)) if (!(cxld->flags & CXL_DECODER_F_RAM))
return 0; return 0;
root_range = (struct range) { return range_contains(&cxld->hpa_range, dev_range);
.start = cxld->platform_res.start,
.end = cxld->platform_res.end,
};
return range_contains(&root_range, dev_range);
} }
static void disable_hdm(void *_cxlhdm) static void disable_hdm(void *_cxlhdm)
...@@ -458,3 +453,175 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) ...@@ -458,3 +453,175 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
return 0; return 0;
} }
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
static struct pci_doe_mb *find_cdat_doe(struct device *uport)
{
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
unsigned long index;
void *entry;
cxlmd = to_cxl_memdev(uport);
cxlds = cxlmd->cxlds;
xa_for_each(&cxlds->doe_mbs, index, entry) {
struct pci_doe_mb *cur = entry;
if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DOE_PROTOCOL_TABLE_ACCESS))
return cur;
}
return NULL;
}
#define CDAT_DOE_REQ(entry_handle) \
(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
static void cxl_doe_task_complete(struct pci_doe_task *task)
{
complete(task->private);
}
struct cdat_doe_task {
u32 request_pl;
u32 response_pl[32];
struct completion c;
struct pci_doe_task task;
};
#define DECLARE_CDAT_DOE_TASK(req, cdt) \
struct cdat_doe_task cdt = { \
.c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \
.request_pl = req, \
.task = { \
.prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \
.prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
.request_pl = &cdt.request_pl, \
.request_pl_sz = sizeof(cdt.request_pl), \
.response_pl = cdt.response_pl, \
.response_pl_sz = sizeof(cdt.response_pl), \
.complete = cxl_doe_task_complete, \
.private = &cdt.c, \
} \
}
static int cxl_cdat_get_length(struct device *dev,
struct pci_doe_mb *cdat_doe,
size_t *length)
{
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
if (t.task.rv < sizeof(u32))
return -EIO;
*length = t.response_pl[1];
dev_dbg(dev, "CDAT length %zu\n", *length);
return 0;
}
static int cxl_cdat_read_table(struct device *dev,
struct pci_doe_mb *cdat_doe,
struct cxl_cdat *cdat)
{
size_t length = cdat->length;
u32 *data = cdat->table;
int entry_handle = 0;
do {
DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
size_t entry_dw;
u32 *entry;
int rc;
rc = pci_doe_submit_task(cdat_doe, &t.task);
if (rc < 0) {
dev_err(dev, "DOE submit failed: %d", rc);
return rc;
}
wait_for_completion(&t.c);
/* 1 DW header + 1 DW data min */
if (t.task.rv < (2 * sizeof(u32)))
return -EIO;
/* Get the CXL table access header entry handle */
entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
t.response_pl[0]);
entry = t.response_pl + 1;
entry_dw = t.task.rv / sizeof(u32);
/* Skip Header */
entry_dw -= 1;
entry_dw = min(length / sizeof(u32), entry_dw);
/* Prevent length < 1 DW from causing a buffer overflow */
if (entry_dw) {
memcpy(data, entry, entry_dw * sizeof(u32));
length -= entry_dw * sizeof(u32);
data += entry_dw;
}
} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
return 0;
}
/**
* read_cdat_data - Read the CDAT data on this port
* @port: Port to read data from
*
* This call will sleep waiting for responses from the DOE mailbox.
*/
void read_cdat_data(struct cxl_port *port)
{
struct pci_doe_mb *cdat_doe;
struct device *dev = &port->dev;
struct device *uport = port->uport;
size_t cdat_length;
int rc;
cdat_doe = find_cdat_doe(uport);
if (!cdat_doe) {
dev_dbg(dev, "No CDAT mailbox\n");
return;
}
port->cdat_available = true;
if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
dev_dbg(dev, "No CDAT length\n");
return;
}
port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
if (!port->cdat.table)
return;
port->cdat.length = cdat_length;
rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
if (rc) {
/* Don't leave table data allocated on error */
devm_kfree(dev, port->cdat.table);
port->cdat.table = NULL;
port->cdat.length = 0;
dev_err(dev, "CDAT data read error\n");
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
...@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data) ...@@ -62,9 +62,9 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev); return is_cxl_nvdimm_bridge(dev);
} }
struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
{ {
struct cxl_port *port = find_cxl_root(&cxl_nvd->dev); struct cxl_port *port = find_cxl_root(start);
struct device *dev; struct device *dev;
if (!port) if (!port)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -50,6 +50,24 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) ...@@ -50,6 +50,24 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
return container_of(dev, struct cxl_memdev, dev); return container_of(dev, struct cxl_memdev, dev);
} }
static inline struct cxl_port *cxled_to_port(struct cxl_endpoint_decoder *cxled)
{
return to_cxl_port(cxled->cxld.dev.parent);
}
static inline struct cxl_port *cxlrd_to_port(struct cxl_root_decoder *cxlrd)
{
return to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
}
static inline struct cxl_memdev *
cxled_to_memdev(struct cxl_endpoint_decoder *cxled)
{
struct cxl_port *port = to_cxl_port(cxled->cxld.dev.parent);
return to_cxl_memdev(port->uport);
}
bool is_cxl_memdev(struct device *dev); bool is_cxl_memdev(struct device *dev);
static inline bool is_cxl_endpoint(struct cxl_port *port) static inline bool is_cxl_endpoint(struct cxl_port *port)
{ {
...@@ -178,8 +196,9 @@ struct cxl_endpoint_dvsec_info { ...@@ -178,8 +196,9 @@ struct cxl_endpoint_dvsec_info {
* @firmware_version: Firmware version for the memory device. * @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL. * @enabled_cmds: Hardware commands found enabled in CEL.
* @exclusive_cmds: Commands that are kernel-internal only * @exclusive_cmds: Commands that are kernel-internal only
* @pmem_range: Active Persistent memory capacity configuration * @dpa_res: Overall DPA resource tree for the device
* @ram_range: Active Volatile memory capacity configuration * @pmem_res: Active Persistent memory capacity configuration
* @ram_res: Active Volatile memory capacity configuration
* @total_bytes: sum of all possible capacities * @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity * @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity * @persistent_only_bytes: hard persistent capacity
...@@ -191,6 +210,7 @@ struct cxl_endpoint_dvsec_info { ...@@ -191,6 +210,7 @@ struct cxl_endpoint_dvsec_info {
* @component_reg_phys: register base of component registers * @component_reg_phys: register base of component registers
* @info: Cached DVSEC information about the device. * @info: Cached DVSEC information about the device.
* @serial: PCIe Device Serial Number * @serial: PCIe Device Serial Number
* @doe_mbs: PCI DOE mailbox array
* @mbox_send: @dev specific transport for transmitting mailbox commands * @mbox_send: @dev specific transport for transmitting mailbox commands
* *
* See section 8.2.9.5.2 Capacity Configuration and Label Storage for * See section 8.2.9.5.2 Capacity Configuration and Label Storage for
...@@ -209,8 +229,9 @@ struct cxl_dev_state { ...@@ -209,8 +229,9 @@ struct cxl_dev_state {
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX); DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
struct range pmem_range; struct resource dpa_res;
struct range ram_range; struct resource pmem_res;
struct resource ram_res;
u64 total_bytes; u64 total_bytes;
u64 volatile_only_bytes; u64 volatile_only_bytes;
u64 persistent_only_bytes; u64 persistent_only_bytes;
...@@ -224,6 +245,8 @@ struct cxl_dev_state { ...@@ -224,6 +245,8 @@ struct cxl_dev_state {
resource_size_t component_reg_phys; resource_size_t component_reg_phys;
u64 serial; u64 serial;
struct xarray doe_mbs;
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
}; };
...@@ -299,6 +322,13 @@ struct cxl_mbox_identify { ...@@ -299,6 +322,13 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps; u8 qos_telemetry_caps;
} __packed; } __packed;
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
__le64 next_volatile_cap;
__le64 next_persistent_cap;
} __packed;
struct cxl_mbox_get_lsa { struct cxl_mbox_get_lsa {
__le32 offset; __le32 offset;
__le32 length; __le32 length;
...@@ -370,4 +400,8 @@ struct cxl_hdm { ...@@ -370,4 +400,8 @@ struct cxl_hdm {
unsigned int interleave_mask; unsigned int interleave_mask;
struct cxl_port *port; struct cxl_port *port;
}; };
struct seq_file;
struct dentry *cxl_debugfs_create_dir(const char *dir);
void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
#endif /* __CXL_MEM_H__ */ #endif /* __CXL_MEM_H__ */
...@@ -74,4 +74,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, ...@@ -74,4 +74,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev,
int devm_cxl_port_enumerate_dports(struct cxl_port *port); int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state; struct cxl_dev_state;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
void read_cdat_data(struct cxl_port *port);
#endif /* __CXL_PCI_H__ */ #endif /* __CXL_PCI_H__ */
// SPDX-License-Identifier: GPL-2.0-only // SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
#include <linux/debugfs.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
...@@ -24,42 +25,32 @@ ...@@ -24,42 +25,32 @@
* in higher level operations. * in higher level operations.
*/ */
static int create_endpoint(struct cxl_memdev *cxlmd, static void enable_suspend(void *data)
struct cxl_port *parent_port)
{ {
struct cxl_dev_state *cxlds = cxlmd->cxlds; cxl_mem_active_dec();
struct cxl_port *endpoint; }
int rc;
endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, static void remove_debugfs(void *dentry)
cxlds->component_reg_phys, parent_port); {
if (IS_ERR(endpoint)) debugfs_remove_recursive(dentry);
return PTR_ERR(endpoint); }
dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); static int cxl_mem_dpa_show(struct seq_file *file, void *data)
{
struct device *dev = file->private;
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
rc = cxl_endpoint_autoremove(cxlmd, endpoint); cxl_dpa_debug(file, cxlmd->cxlds);
if (rc)
return rc;
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
return 0; return 0;
} }
static void enable_suspend(void *data)
{
cxl_mem_active_dec();
}
static int cxl_mem_probe(struct device *dev) static int cxl_mem_probe(struct device *dev)
{ {
struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_port *parent_port; struct cxl_port *parent_port;
struct cxl_dport *dport;
struct dentry *dentry;
int rc; int rc;
/* /*
...@@ -73,11 +64,17 @@ static int cxl_mem_probe(struct device *dev) ...@@ -73,11 +64,17 @@ static int cxl_mem_probe(struct device *dev)
if (work_pending(&cxlmd->detach_work)) if (work_pending(&cxlmd->detach_work))
return -EBUSY; return -EBUSY;
dentry = cxl_debugfs_create_dir(dev_name(dev));
debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
if (rc)
return rc;
rc = devm_cxl_enumerate_ports(cxlmd); rc = devm_cxl_enumerate_ports(cxlmd);
if (rc) if (rc)
return rc; return rc;
parent_port = cxl_mem_find_port(cxlmd); parent_port = cxl_mem_find_port(cxlmd, &dport);
if (!parent_port) { if (!parent_port) {
dev_err(dev, "CXL port topology not found\n"); dev_err(dev, "CXL port topology not found\n");
return -ENXIO; return -ENXIO;
...@@ -91,7 +88,7 @@ static int cxl_mem_probe(struct device *dev) ...@@ -91,7 +88,7 @@ static int cxl_mem_probe(struct device *dev)
goto unlock; goto unlock;
} }
rc = create_endpoint(cxlmd, parent_port); rc = devm_cxl_add_endpoint(cxlmd, dport);
unlock: unlock:
device_unlock(&parent_port->dev); device_unlock(&parent_port->dev);
put_device(&parent_port->dev); put_device(&parent_port->dev);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/io.h> #include <linux/io.h>
#include "cxlmem.h" #include "cxlmem.h"
#include "cxlpci.h" #include "cxlpci.h"
...@@ -386,6 +387,47 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, ...@@ -386,6 +387,47 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
return rc; return rc;
} }
static void cxl_pci_destroy_doe(void *mbs)
{
xa_destroy(mbs);
}
static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
{
struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
u16 off = 0;
xa_init(&cxlds->doe_mbs);
if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
dev_err(dev, "Failed to create XArray for DOE's\n");
return;
}
/*
* Mailbox creation is best effort. Higher layers must determine if
* the lack of a mailbox for their protocol is a device failure or not.
*/
pci_doe_for_each_off(pdev, off) {
struct pci_doe_mb *doe_mb;
doe_mb = pcim_doe_create_mb(pdev, off);
if (IS_ERR(doe_mb)) {
dev_err(dev, "Failed to create MB object for MB @ %x\n",
off);
continue;
}
if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
dev_err(dev, "xa_insert failed to insert MB @ %x\n",
off);
continue;
}
dev_dbg(dev, "Created DOE mailbox @%x\n", off);
}
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{ {
struct cxl_register_map map; struct cxl_register_map map;
...@@ -434,6 +476,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -434,6 +476,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
devm_cxl_pci_create_doe(cxlds);
rc = cxl_pci_setup_mailbox(cxlds); rc = cxl_pci_setup_mailbox(cxlds);
if (rc) if (rc)
return rc; return rc;
...@@ -454,7 +498,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -454,7 +498,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlmd)) if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd); return PTR_ERR(cxlmd);
if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
return rc; return rc;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/ndctl.h> #include <linux/ndctl.h>
#include <linux/async.h> #include <linux/async.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/nd.h>
#include "cxlmem.h" #include "cxlmem.h"
#include "cxl.h" #include "cxl.h"
...@@ -26,7 +27,23 @@ static void clear_exclusive(void *cxlds) ...@@ -26,7 +27,23 @@ static void clear_exclusive(void *cxlds)
static void unregister_nvdimm(void *nvdimm) static void unregister_nvdimm(void *nvdimm)
{ {
struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm);
struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge;
struct cxl_pmem_region *cxlr_pmem;
device_lock(&cxl_nvb->dev);
cxlr_pmem = cxl_nvd->region;
dev_set_drvdata(&cxl_nvd->dev, NULL);
cxl_nvd->region = NULL;
device_unlock(&cxl_nvb->dev);
if (cxlr_pmem) {
device_release_driver(&cxlr_pmem->dev);
put_device(&cxlr_pmem->dev);
}
nvdimm_delete(nvdimm); nvdimm_delete(nvdimm);
cxl_nvd->bridge = NULL;
} }
static int cxl_nvdimm_probe(struct device *dev) static int cxl_nvdimm_probe(struct device *dev)
...@@ -39,7 +56,7 @@ static int cxl_nvdimm_probe(struct device *dev) ...@@ -39,7 +56,7 @@ static int cxl_nvdimm_probe(struct device *dev)
struct nvdimm *nvdimm; struct nvdimm *nvdimm;
int rc; int rc;
cxl_nvb = cxl_find_nvdimm_bridge(cxl_nvd); cxl_nvb = cxl_find_nvdimm_bridge(dev);
if (!cxl_nvb) if (!cxl_nvb)
return -ENXIO; return -ENXIO;
...@@ -66,6 +83,7 @@ static int cxl_nvdimm_probe(struct device *dev) ...@@ -66,6 +83,7 @@ static int cxl_nvdimm_probe(struct device *dev)
} }
dev_set_drvdata(dev, nvdimm); dev_set_drvdata(dev, nvdimm);
cxl_nvd->bridge = cxl_nvb;
rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
out: out:
device_unlock(&cxl_nvb->dev); device_unlock(&cxl_nvb->dev);
...@@ -204,15 +222,38 @@ static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb) ...@@ -204,15 +222,38 @@ static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb)
return cxl_nvb->nvdimm_bus != NULL; return cxl_nvb->nvdimm_bus != NULL;
} }
static int cxl_nvdimm_release_driver(struct device *dev, void *data) static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb)
{ {
struct cxl_nvdimm *cxl_nvd;
if (!is_cxl_nvdimm(dev)) if (!is_cxl_nvdimm(dev))
return 0; return 0;
cxl_nvd = to_cxl_nvdimm(dev);
if (cxl_nvd->bridge != cxl_nvb)
return 0;
device_release_driver(dev); device_release_driver(dev);
return 0; return 0;
} }
static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus) static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb)
{
struct cxl_pmem_region *cxlr_pmem;
if (!is_cxl_pmem_region(dev))
return 0;
cxlr_pmem = to_cxl_pmem_region(dev);
if (cxlr_pmem->bridge != cxl_nvb)
return 0;
device_release_driver(dev);
return 0;
}
static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb,
struct nvdimm_bus *nvdimm_bus)
{ {
if (!nvdimm_bus) if (!nvdimm_bus)
return; return;
...@@ -222,7 +263,10 @@ static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus) ...@@ -222,7 +263,10 @@ static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus)
* nvdimm_bus_unregister() rips the nvdimm objects out from * nvdimm_bus_unregister() rips the nvdimm objects out from
* underneath them. * underneath them.
*/ */
bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_release_driver); bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_pmem_region_release_driver);
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb,
cxl_nvdimm_release_driver);
nvdimm_bus_unregister(nvdimm_bus); nvdimm_bus_unregister(nvdimm_bus);
} }
...@@ -260,7 +304,7 @@ static void cxl_nvb_update_state(struct work_struct *work) ...@@ -260,7 +304,7 @@ static void cxl_nvb_update_state(struct work_struct *work)
dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc); dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc);
} }
offline_nvdimm_bus(victim_bus); offline_nvdimm_bus(cxl_nvb, victim_bus);
put_device(&cxl_nvb->dev); put_device(&cxl_nvb->dev);
} }
...@@ -315,6 +359,203 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = { ...@@ -315,6 +359,203 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = {
.id = CXL_DEVICE_NVDIMM_BRIDGE, .id = CXL_DEVICE_NVDIMM_BRIDGE,
}; };
static int match_cxl_nvdimm(struct device *dev, void *data)
{
return is_cxl_nvdimm(dev);
}
static void unregister_nvdimm_region(void *nd_region)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_pmem_region *cxlr_pmem;
int i;
cxlr_pmem = nd_region_provider_data(nd_region);
cxl_nvb = cxlr_pmem->bridge;
device_lock(&cxl_nvb->dev);
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
if (cxl_nvd->region) {
put_device(&cxlr_pmem->dev);
cxl_nvd->region = NULL;
}
}
device_unlock(&cxl_nvb->dev);
nvdimm_region_delete(nd_region);
}
static void cxlr_pmem_remove_resource(void *res)
{
remove_resource(res);
}
struct cxl_pmem_region_info {
u64 offset;
u64 serial;
};
static int cxl_pmem_region_probe(struct device *dev)
{
struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
struct cxl_region *cxlr = cxlr_pmem->cxlr;
struct cxl_pmem_region_info *info = NULL;
struct cxl_nvdimm_bridge *cxl_nvb;
struct nd_interleave_set *nd_set;
struct nd_region_desc ndr_desc;
struct cxl_nvdimm *cxl_nvd;
struct nvdimm *nvdimm;
struct resource *res;
int rc, i = 0;
cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
if (!cxl_nvb) {
dev_dbg(dev, "bridge not found\n");
return -ENXIO;
}
cxlr_pmem->bridge = cxl_nvb;
device_lock(&cxl_nvb->dev);
if (!cxl_nvb->nvdimm_bus) {
dev_dbg(dev, "nvdimm bus not found\n");
rc = -ENXIO;
goto err;
}
memset(&mappings, 0, sizeof(mappings));
memset(&ndr_desc, 0, sizeof(ndr_desc));
res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
if (!res) {
rc = -ENOMEM;
goto err;
}
res->name = "Persistent Memory";
res->start = cxlr_pmem->hpa_range.start;
res->end = cxlr_pmem->hpa_range.end;
res->flags = IORESOURCE_MEM;
res->desc = IORES_DESC_PERSISTENT_MEMORY;
rc = insert_resource(&iomem_resource, res);
if (rc)
goto err;
rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
if (rc)
goto err;
ndr_desc.res = res;
ndr_desc.provider_data = cxlr_pmem;
ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
ndr_desc.target_node = phys_to_target_node(res->start);
if (ndr_desc.target_node == NUMA_NO_NODE) {
ndr_desc.target_node = ndr_desc.numa_node;
dev_dbg(&cxlr->dev, "changing target node from %d to %d",
NUMA_NO_NODE, ndr_desc.target_node);
}
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set) {
rc = -ENOMEM;
goto err;
}
ndr_desc.memregion = cxlr->id;
set_bit(ND_REGION_CXL, &ndr_desc.flags);
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto err;
}
for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
struct cxl_memdev *cxlmd = m->cxlmd;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *d;
d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
if (!d) {
dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
}
/* safe to drop ref now with bridge lock held */
put_device(d);
cxl_nvd = to_cxl_nvdimm(d);
nvdimm = dev_get_drvdata(&cxl_nvd->dev);
if (!nvdimm) {
dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
dev_name(&cxlmd->dev));
rc = -ENODEV;
goto err;
}
cxl_nvd->region = cxlr_pmem;
get_device(&cxlr_pmem->dev);
m->cxl_nvd = cxl_nvd;
mappings[i] = (struct nd_mapping_desc) {
.nvdimm = nvdimm,
.start = m->start,
.size = m->size,
.position = i,
};
info[i].offset = m->start;
info[i].serial = cxlds->serial;
}
ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
ndr_desc.mapping = mappings;
/*
* TODO enable CXL labels which skip the need for 'interleave-set cookie'
*/
nd_set->cookie1 =
nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
nd_set->cookie2 = nd_set->cookie1;
ndr_desc.nd_set = nd_set;
cxlr_pmem->nd_region =
nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
if (!cxlr_pmem->nd_region) {
rc = -ENOMEM;
goto err;
}
rc = devm_add_action_or_reset(dev, unregister_nvdimm_region,
cxlr_pmem->nd_region);
out:
kfree(info);
device_unlock(&cxl_nvb->dev);
put_device(&cxl_nvb->dev);
return rc;
err:
dev_dbg(dev, "failed to create nvdimm region\n");
for (i--; i >= 0; i--) {
nvdimm = mappings[i].nvdimm;
cxl_nvd = nvdimm_provider_data(nvdimm);
put_device(&cxl_nvd->region->dev);
cxl_nvd->region = NULL;
}
goto out;
}
static struct cxl_driver cxl_pmem_region_driver = {
.name = "cxl_pmem_region",
.probe = cxl_pmem_region_probe,
.id = CXL_DEVICE_PMEM_REGION,
};
/* /*
* Return all bridges to the CXL_NVB_NEW state to invalidate any * Return all bridges to the CXL_NVB_NEW state to invalidate any
* ->state_work referring to the now destroyed cxl_pmem_wq. * ->state_work referring to the now destroyed cxl_pmem_wq.
...@@ -359,8 +600,14 @@ static __init int cxl_pmem_init(void) ...@@ -359,8 +600,14 @@ static __init int cxl_pmem_init(void)
if (rc) if (rc)
goto err_nvdimm; goto err_nvdimm;
rc = cxl_driver_register(&cxl_pmem_region_driver);
if (rc)
goto err_region;
return 0; return 0;
err_region:
cxl_driver_unregister(&cxl_nvdimm_driver);
err_nvdimm: err_nvdimm:
cxl_driver_unregister(&cxl_nvdimm_bridge_driver); cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
err_bridge: err_bridge:
...@@ -370,6 +617,7 @@ static __init int cxl_pmem_init(void) ...@@ -370,6 +617,7 @@ static __init int cxl_pmem_init(void)
static __exit void cxl_pmem_exit(void) static __exit void cxl_pmem_exit(void)
{ {
cxl_driver_unregister(&cxl_pmem_region_driver);
cxl_driver_unregister(&cxl_nvdimm_driver); cxl_driver_unregister(&cxl_nvdimm_driver);
cxl_driver_unregister(&cxl_nvdimm_bridge_driver); cxl_driver_unregister(&cxl_nvdimm_bridge_driver);
destroy_cxl_pmem_wq(); destroy_cxl_pmem_wq();
...@@ -381,3 +629,4 @@ module_exit(cxl_pmem_exit); ...@@ -381,3 +629,4 @@ module_exit(cxl_pmem_exit);
MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(CXL);
MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE);
MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM);
MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION);
...@@ -53,6 +53,9 @@ static int cxl_port_probe(struct device *dev) ...@@ -53,6 +53,9 @@ static int cxl_port_probe(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dev_state *cxlds = cxlmd->cxlds;
/* Cache the data early to ensure is_visible() works */
read_cdat_data(port);
get_device(&cxlmd->dev); get_device(&cxlmd->dev);
rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
if (rc) if (rc)
...@@ -78,10 +81,60 @@ static int cxl_port_probe(struct device *dev) ...@@ -78,10 +81,60 @@ static int cxl_port_probe(struct device *dev)
return 0; return 0;
} }
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_port *port = to_cxl_port(dev);
if (!port->cdat_available)
return -ENXIO;
if (!port->cdat.table)
return 0;
return memory_read_from_buffer(buf, count, &offset,
port->cdat.table,
port->cdat.length);
}
static BIN_ATTR_ADMIN_RO(CDAT, 0);
static umode_t cxl_port_bin_attr_is_visible(struct kobject *kobj,
struct bin_attribute *attr, int i)
{
struct device *dev = kobj_to_dev(kobj);
struct cxl_port *port = to_cxl_port(dev);
if ((attr == &bin_attr_CDAT) && port->cdat_available)
return attr->attr.mode;
return 0;
}
static struct bin_attribute *cxl_cdat_bin_attributes[] = {
&bin_attr_CDAT,
NULL,
};
static struct attribute_group cxl_cdat_attribute_group = {
.bin_attrs = cxl_cdat_bin_attributes,
.is_bin_visible = cxl_port_bin_attr_is_visible,
};
static const struct attribute_group *cxl_port_attribute_groups[] = {
&cxl_cdat_attribute_group,
NULL,
};
static struct cxl_driver cxl_port_driver = { static struct cxl_driver cxl_port_driver = {
.name = "cxl_port", .name = "cxl_port",
.probe = cxl_port_probe, .probe = cxl_port_probe,
.id = CXL_DEVICE_PORT, .id = CXL_DEVICE_PORT,
.drv = {
.dev_groups = cxl_port_attribute_groups,
},
}; };
module_cxl_driver(cxl_port_driver); module_cxl_driver(cxl_port_driver);
......
...@@ -133,6 +133,7 @@ static void nd_region_release(struct device *dev) ...@@ -133,6 +133,7 @@ static void nd_region_release(struct device *dev)
put_device(&nvdimm->dev); put_device(&nvdimm->dev);
} }
free_percpu(nd_region->lane); free_percpu(nd_region->lane);
if (!test_bit(ND_REGION_CXL, &nd_region->flags))
memregion_free(nd_region->id); memregion_free(nd_region->id);
kfree(nd_region); kfree(nd_region);
} }
...@@ -982,9 +983,14 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -982,9 +983,14 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (!nd_region) if (!nd_region)
return NULL; return NULL;
/* CXL pre-assigns memregion ids before creating nvdimm regions */
if (test_bit(ND_REGION_CXL, &ndr_desc->flags)) {
nd_region->id = ndr_desc->memregion;
} else {
nd_region->id = memregion_alloc(GFP_KERNEL); nd_region->id = memregion_alloc(GFP_KERNEL);
if (nd_region->id < 0) if (nd_region->id < 0)
goto err_id; goto err_id;
}
nd_region->lane = alloc_percpu(struct nd_percpu_lane); nd_region->lane = alloc_percpu(struct nd_percpu_lane);
if (!nd_region->lane) if (!nd_region->lane)
...@@ -1043,9 +1049,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -1043,9 +1049,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
return nd_region; return nd_region;
err_percpu: err_percpu:
if (!test_bit(ND_REGION_CXL, &ndr_desc->flags))
memregion_free(nd_region->id); memregion_free(nd_region->id);
err_id: err_id:
kfree(nd_region); kfree(nd_region);
return NULL; return NULL;
} }
...@@ -1068,6 +1075,13 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, ...@@ -1068,6 +1075,13 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
} }
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
void nvdimm_region_delete(struct nd_region *nd_region)
{
if (nd_region)
nd_device_unregister(&nd_region->dev, ND_SYNC);
}
EXPORT_SYMBOL_GPL(nvdimm_region_delete);
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio) int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
{ {
int rc = 0; int rc = 0;
......
...@@ -121,6 +121,9 @@ config XEN_PCIDEV_FRONTEND ...@@ -121,6 +121,9 @@ config XEN_PCIDEV_FRONTEND
config PCI_ATS config PCI_ATS
bool bool
config PCI_DOE
bool
config PCI_ECAM config PCI_ECAM
bool bool
......
...@@ -31,6 +31,7 @@ obj-$(CONFIG_PCI_ECAM) += ecam.o ...@@ -31,6 +31,7 @@ obj-$(CONFIG_PCI_ECAM) += ecam.o
obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
obj-$(CONFIG_VGA_ARB) += vgaarb.o obj-$(CONFIG_VGA_ARB) += vgaarb.o
obj-$(CONFIG_PCI_DOE) += doe.o
# Endpoint library must be initialized before its users # Endpoint library must be initialized before its users
obj-$(CONFIG_PCI_ENDPOINT) += endpoint/ obj-$(CONFIG_PCI_ENDPOINT) += endpoint/
......
This diff is collapsed.
...@@ -2315,7 +2315,7 @@ EXPORT_SYMBOL(pci_alloc_dev); ...@@ -2315,7 +2315,7 @@ EXPORT_SYMBOL(pci_alloc_dev);
static bool pci_bus_crs_vendor_id(u32 l) static bool pci_bus_crs_vendor_id(u32 l)
{ {
return (l & 0xffff) == 0x0001; return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG;
} }
static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l, static bool pci_bus_wait_crs(struct pci_bus *bus, int devfn, u32 *l,
......
...@@ -141,6 +141,7 @@ enum { ...@@ -141,6 +141,7 @@ enum {
IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6,
IORES_DESC_RESERVED = 7, IORES_DESC_RESERVED = 7,
IORES_DESC_SOFT_RESERVED = 8, IORES_DESC_SOFT_RESERVED = 8,
IORES_DESC_CXL = 9,
}; };
/* /*
...@@ -329,6 +330,8 @@ struct resource *devm_request_free_mem_region(struct device *dev, ...@@ -329,6 +330,8 @@ struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size); struct resource *base, unsigned long size);
struct resource *request_free_mem_region(struct resource *base, struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name); unsigned long size, const char *name);
struct resource *alloc_free_mem_region(struct resource *base,
unsigned long size, unsigned long align, const char *name);
static inline void irqresource_disabled(struct resource *res, u32 irq) static inline void irqresource_disabled(struct resource *res, u32 irq)
{ {
......
...@@ -59,6 +59,9 @@ enum { ...@@ -59,6 +59,9 @@ enum {
/* Platform provides asynchronous flush mechanism */ /* Platform provides asynchronous flush mechanism */
ND_REGION_ASYNC = 3, ND_REGION_ASYNC = 3,
/* Region was created by CXL subsystem */
ND_REGION_CXL = 4,
/* mark newly adjusted resources as requiring a label update */ /* mark newly adjusted resources as requiring a label update */
DPA_RESOURCE_ADJUSTED = 1 << 0, DPA_RESOURCE_ADJUSTED = 1 << 0,
}; };
...@@ -122,6 +125,7 @@ struct nd_region_desc { ...@@ -122,6 +125,7 @@ struct nd_region_desc {
int numa_node; int numa_node;
int target_node; int target_node;
unsigned long flags; unsigned long flags;
int memregion;
struct device_node *of_node; struct device_node *of_node;
int (*flush)(struct nd_region *nd_region, struct bio *bio); int (*flush)(struct nd_region *nd_region, struct bio *bio);
}; };
...@@ -259,6 +263,7 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, ...@@ -259,6 +263,7 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus,
cmd_mask, num_flush, flush_wpq, NULL, NULL, NULL); cmd_mask, num_flush, flush_wpq, NULL, NULL, NULL);
} }
void nvdimm_delete(struct nvdimm *nvdimm); void nvdimm_delete(struct nvdimm *nvdimm);
void nvdimm_region_delete(struct nd_region *nd_region);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Data Object Exchange
* PCIe r6.0, sec 6.30 DOE
*
* Copyright (C) 2021 Huawei
* Jonathan Cameron <Jonathan.Cameron@huawei.com>
*
* Copyright (C) 2022 Intel Corporation
* Ira Weiny <ira.weiny@intel.com>
*/
#ifndef LINUX_PCI_DOE_H
#define LINUX_PCI_DOE_H
struct pci_doe_protocol {
u16 vid;
u8 type;
};
struct pci_doe_mb;
/**
* struct pci_doe_task - represents a single query/response
*
* @prot: DOE Protocol
* @request_pl: The request payload
* @request_pl_sz: Size of the request payload (bytes)
* @response_pl: The response payload
* @response_pl_sz: Size of the response payload (bytes)
* @rv: Return value. Length of received response or error (bytes)
* @complete: Called when task is complete
* @private: Private data for the consumer
* @work: Used internally by the mailbox
* @doe_mb: Used internally by the mailbox
*
* The payload sizes and rv are specified in bytes with the following
* restrictions concerning the protocol.
*
* 1) The request_pl_sz must be a multiple of double words (4 bytes)
* 2) The response_pl_sz must be >= a single double word (4 bytes)
* 3) rv is returned as bytes but it will be a multiple of double words
*
* NOTE there is no need for the caller to initialize work or doe_mb.
*/
struct pci_doe_task {
struct pci_doe_protocol prot;
u32 *request_pl;
size_t request_pl_sz;
u32 *response_pl;
size_t response_pl_sz;
int rv;
void (*complete)(struct pci_doe_task *task);
void *private;
/* No need for the user to initialize these fields */
struct work_struct work;
struct pci_doe_mb *doe_mb;
};
/**
* pci_doe_for_each_off - Iterate each DOE capability
* @pdev: struct pci_dev to iterate
* @off: u16 of config space offset of each mailbox capability found
*/
#define pci_doe_for_each_off(pdev, off) \
for (off = pci_find_next_ext_capability(pdev, off, \
PCI_EXT_CAP_ID_DOE); \
off > 0; \
off = pci_find_next_ext_capability(pdev, off, \
PCI_EXT_CAP_ID_DOE))
struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset);
bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
#endif
...@@ -151,6 +151,7 @@ ...@@ -151,6 +151,7 @@
#define PCI_CLASS_OTHERS 0xff #define PCI_CLASS_OTHERS 0xff
/* Vendors and devices. Sort key: vendor first, device next. */ /* Vendors and devices. Sort key: vendor first, device next. */
#define PCI_VENDOR_ID_PCI_SIG 0x0001
#define PCI_VENDOR_ID_LOONGSON 0x0014 #define PCI_VENDOR_ID_LOONGSON 0x0014
......
...@@ -235,6 +235,22 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size) ...@@ -235,6 +235,22 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
#define BIN_ATTR_RW(_name, _size) \ #define BIN_ATTR_RW(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
#define __BIN_ATTR_ADMIN_RO(_name, _size) { \
.attr = { .name = __stringify(_name), .mode = 0400 }, \
.read = _name##_read, \
.size = _size, \
}
#define __BIN_ATTR_ADMIN_RW(_name, _size) \
__BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size)
#define BIN_ATTR_ADMIN_RO(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size)
#define BIN_ATTR_ADMIN_RW(_name, _size) \
struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size)
struct sysfs_ops { struct sysfs_ops {
ssize_t (*show)(struct kobject *, struct attribute *, char *); ssize_t (*show)(struct kobject *, struct attribute *, char *);
ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
......
...@@ -737,7 +737,8 @@ ...@@ -737,7 +737,8 @@
#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */
#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE
#define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_DSN_SIZEOF 12
#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
...@@ -1103,4 +1104,30 @@ ...@@ -1103,4 +1104,30 @@
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4
/* Data Object Exchange */
#define PCI_DOE_CAP 0x04 /* DOE Capabilities Register */
#define PCI_DOE_CAP_INT_SUP 0x00000001 /* Interrupt Support */
#define PCI_DOE_CAP_INT_MSG_NUM 0x00000ffe /* Interrupt Message Number */
#define PCI_DOE_CTRL 0x08 /* DOE Control Register */
#define PCI_DOE_CTRL_ABORT 0x00000001 /* DOE Abort */
#define PCI_DOE_CTRL_INT_EN 0x00000002 /* DOE Interrupt Enable */
#define PCI_DOE_CTRL_GO 0x80000000 /* DOE Go */
#define PCI_DOE_STATUS 0x0c /* DOE Status Register */
#define PCI_DOE_STATUS_BUSY 0x00000001 /* DOE Busy */
#define PCI_DOE_STATUS_INT_STATUS 0x00000002 /* DOE Interrupt Status */
#define PCI_DOE_STATUS_ERROR 0x00000004 /* DOE Error */
#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */
#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */
#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */
/* DOE Data Object - note not actually registers */
#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff
#define PCI_DOE_DATA_OBJECT_HEADER_1_TYPE 0x00ff0000
#define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH 0x0003ffff
#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
#endif /* LINUX_PCI_REGS_H */ #endif /* LINUX_PCI_REGS_H */
...@@ -489,8 +489,9 @@ int __weak page_is_ram(unsigned long pfn) ...@@ -489,8 +489,9 @@ int __weak page_is_ram(unsigned long pfn)
} }
EXPORT_SYMBOL_GPL(page_is_ram); EXPORT_SYMBOL_GPL(page_is_ram);
static int __region_intersects(resource_size_t start, size_t size, static int __region_intersects(struct resource *parent, resource_size_t start,
unsigned long flags, unsigned long desc) size_t size, unsigned long flags,
unsigned long desc)
{ {
struct resource res; struct resource res;
int type = 0; int other = 0; int type = 0; int other = 0;
...@@ -499,7 +500,7 @@ static int __region_intersects(resource_size_t start, size_t size, ...@@ -499,7 +500,7 @@ static int __region_intersects(resource_size_t start, size_t size,
res.start = start; res.start = start;
res.end = start + size - 1; res.end = start + size - 1;
for (p = iomem_resource.child; p ; p = p->sibling) { for (p = parent->child; p ; p = p->sibling) {
bool is_type = (((p->flags & flags) == flags) && bool is_type = (((p->flags & flags) == flags) &&
((desc == IORES_DESC_NONE) || ((desc == IORES_DESC_NONE) ||
(desc == p->desc))); (desc == p->desc)));
...@@ -543,7 +544,7 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags, ...@@ -543,7 +544,7 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
int ret; int ret;
read_lock(&resource_lock); read_lock(&resource_lock);
ret = __region_intersects(start, size, flags, desc); ret = __region_intersects(&iomem_resource, start, size, flags, desc);
read_unlock(&resource_lock); read_unlock(&resource_lock);
return ret; return ret;
...@@ -891,6 +892,13 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) ...@@ -891,6 +892,13 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
} }
write_unlock(&resource_lock); write_unlock(&resource_lock);
} }
/*
* Not for general consumption, only early boot memory map parsing, PCI
* resource discovery, and late discovery of CXL resources are expected
* to use this interface. The former are built-in and only the latter,
* CXL, is a module.
*/
EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, CXL);
/** /**
* remove_resource - Remove a resource in the resource tree * remove_resource - Remove a resource in the resource tree
...@@ -1773,39 +1781,95 @@ void resource_list_free(struct list_head *head) ...@@ -1773,39 +1781,95 @@ void resource_list_free(struct list_head *head)
} }
EXPORT_SYMBOL(resource_list_free); EXPORT_SYMBOL(resource_list_free);
#ifdef CONFIG_DEVICE_PRIVATE #ifdef CONFIG_GET_FREE_REGION
static struct resource *__request_free_mem_region(struct device *dev, #define GFR_DESCENDING (1UL << 0)
struct resource *base, unsigned long size, const char *name) #define GFR_REQUEST_REGION (1UL << 1)
#define GFR_DEFAULT_ALIGN (1UL << PA_SECTION_SHIFT)
static resource_size_t gfr_start(struct resource *base, resource_size_t size,
resource_size_t align, unsigned long flags)
{
if (flags & GFR_DESCENDING) {
resource_size_t end;
end = min_t(resource_size_t, base->end,
(1ULL << MAX_PHYSMEM_BITS) - 1);
return end - size + 1;
}
return ALIGN(base->start, align);
}
static bool gfr_continue(struct resource *base, resource_size_t addr,
resource_size_t size, unsigned long flags)
{
if (flags & GFR_DESCENDING)
return addr > size && addr >= base->start;
/*
* In the ascend case be careful that the last increment by
* @size did not wrap 0.
*/
return addr > addr - size &&
addr <= min_t(resource_size_t, base->end,
(1ULL << MAX_PHYSMEM_BITS) - 1);
}
static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,
unsigned long flags)
{
if (flags & GFR_DESCENDING)
return addr - size;
return addr + size;
}
static void remove_free_mem_region(void *_res)
{
struct resource *res = _res;
if (res->parent)
remove_resource(res);
free_resource(res);
}
static struct resource *
get_free_mem_region(struct device *dev, struct resource *base,
resource_size_t size, const unsigned long align,
const char *name, const unsigned long desc,
const unsigned long flags)
{ {
resource_size_t end, addr; resource_size_t addr;
struct resource *res; struct resource *res;
struct region_devres *dr = NULL; struct region_devres *dr = NULL;
size = ALIGN(size, 1UL << PA_SECTION_SHIFT); size = ALIGN(size, align);
end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
addr = end - size + 1UL;
res = alloc_resource(GFP_KERNEL); res = alloc_resource(GFP_KERNEL);
if (!res) if (!res)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
if (dev) { if (dev && (flags & GFR_REQUEST_REGION)) {
dr = devres_alloc(devm_region_release, dr = devres_alloc(devm_region_release,
sizeof(struct region_devres), GFP_KERNEL); sizeof(struct region_devres), GFP_KERNEL);
if (!dr) { if (!dr) {
free_resource(res); free_resource(res);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
} else if (dev) {
if (devm_add_action_or_reset(dev, remove_free_mem_region, res))
return ERR_PTR(-ENOMEM);
} }
write_lock(&resource_lock); write_lock(&resource_lock);
for (; addr > size && addr >= base->start; addr -= size) { for (addr = gfr_start(base, size, align, flags);
if (__region_intersects(addr, size, 0, IORES_DESC_NONE) != gfr_continue(base, addr, size, flags);
addr = gfr_next(addr, size, flags)) {
if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) !=
REGION_DISJOINT) REGION_DISJOINT)
continue; continue;
if (__request_region_locked(res, &iomem_resource, addr, size, if (flags & GFR_REQUEST_REGION) {
name, 0)) if (__request_region_locked(res, &iomem_resource, addr,
size, name, 0))
break; break;
if (dev) { if (dev) {
...@@ -1815,20 +1879,41 @@ static struct resource *__request_free_mem_region(struct device *dev, ...@@ -1815,20 +1879,41 @@ static struct resource *__request_free_mem_region(struct device *dev,
devres_add(dev, dr); devres_add(dev, dr);
} }
res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; res->desc = desc;
write_unlock(&resource_lock); write_unlock(&resource_lock);
/* /*
* A driver is claiming this region so revoke any mappings. * A driver is claiming this region so revoke any
* mappings.
*/ */
revoke_iomem(res); revoke_iomem(res);
} else {
res->start = addr;
res->end = addr + size - 1;
res->name = name;
res->desc = desc;
res->flags = IORESOURCE_MEM;
/*
* Only succeed if the resource hosts an exclusive
* range after the insert
*/
if (__insert_resource(base, res) || res->child)
break;
write_unlock(&resource_lock);
}
return res; return res;
} }
write_unlock(&resource_lock); write_unlock(&resource_lock);
if (flags & GFR_REQUEST_REGION) {
free_resource(res); free_resource(res);
if (dr)
devres_free(dr); devres_free(dr);
} else if (dev)
devm_release_action(dev, remove_free_mem_region, res);
return ERR_PTR(-ERANGE); return ERR_PTR(-ERANGE);
} }
...@@ -1847,18 +1932,48 @@ static struct resource *__request_free_mem_region(struct device *dev, ...@@ -1847,18 +1932,48 @@ static struct resource *__request_free_mem_region(struct device *dev,
struct resource *devm_request_free_mem_region(struct device *dev, struct resource *devm_request_free_mem_region(struct device *dev,
struct resource *base, unsigned long size) struct resource *base, unsigned long size)
{ {
return __request_free_mem_region(dev, base, size, dev_name(dev)); unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
dev_name(dev),
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
} }
EXPORT_SYMBOL_GPL(devm_request_free_mem_region); EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
struct resource *request_free_mem_region(struct resource *base, struct resource *request_free_mem_region(struct resource *base,
unsigned long size, const char *name) unsigned long size, const char *name)
{ {
return __request_free_mem_region(NULL, base, size, name); unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
} }
EXPORT_SYMBOL_GPL(request_free_mem_region); EXPORT_SYMBOL_GPL(request_free_mem_region);
#endif /* CONFIG_DEVICE_PRIVATE */ /**
* alloc_free_mem_region - find a free region relative to @base
* @base: resource that will parent the new resource
* @size: size in bytes of memory to allocate from @base
* @align: alignment requirements for the allocation
* @name: resource name
*
* Buses like CXL, that can dynamically instantiate new memory regions,
* need a method to allocate physical address space for those regions.
* Allocate and insert a new resource to cover a free, unclaimed by a
* descendant of @base, range in the span of @base.
*/
struct resource *alloc_free_mem_region(struct resource *base,
unsigned long size, unsigned long align,
const char *name)
{
/* Default of ascending direction and insert resource */
unsigned long flags = 0;
return get_free_mem_region(NULL, base, size, align, name,
IORES_DESC_NONE, flags);
}
EXPORT_SYMBOL_NS_GPL(alloc_free_mem_region, CXL);
#endif /* CONFIG_GET_FREE_REGION */
static int __init strict_iomem(char *str) static int __init strict_iomem(char *str)
{ {
......
...@@ -983,9 +983,14 @@ config HMM_MIRROR ...@@ -983,9 +983,14 @@ config HMM_MIRROR
bool bool
depends on MMU depends on MMU
config GET_FREE_REGION
depends on SPARSEMEM
bool
config DEVICE_PRIVATE config DEVICE_PRIVATE
bool "Unaddressable device memory (GPU memory, ...)" bool "Unaddressable device memory (GPU memory, ...)"
depends on ZONE_DEVICE depends on ZONE_DEVICE
select GET_FREE_REGION
help help
Allows creation of struct pages to represent unaddressable device Allows creation of struct pages to represent unaddressable device
......
...@@ -47,6 +47,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o ...@@ -47,6 +47,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o
cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o
cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-y += config_check.o cxl_core-y += config_check.o
obj-m += test/ obj-m += test/
This diff is collapsed.
This diff is collapsed.
...@@ -208,13 +208,15 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) ...@@ -208,13 +208,15 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
} }
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
struct cxl_hdm *cxlhdm) struct cxl_hdm *cxlhdm)
{ {
int rc = 0, index; int rc = 0, index;
struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
if (!ops || !ops->is_mock_dev(cxlds->dev)) if (ops && ops->is_mock_dev(cxlds->dev))
rc = 0;
else
rc = cxl_hdm_decode_init(cxlds, cxlhdm); rc = cxl_hdm_decode_init(cxlds, cxlhdm);
put_cxl_mock_ops(index); put_cxl_mock_ops(index);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment