From: Alejandro Lucero Differentiate CXL memory expanders (type 3) from CXL device accelerators (type 2) with a new function for initializing cxl_dev_state and a macro for helping accel drivers to embed cxl_dev_state inside a private struct. Move structs to include/cxl as the size of the accel driver private struct embedding cxl_dev_state needs to know the size of this struct. Use same new initialization with the type3 pci driver. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield --- .../driver-api/cxl/theory-of-operation.rst | 3 + drivers/cxl/core/mbox.c | 12 +- drivers/cxl/core/memdev.c | 31 +++ drivers/cxl/core/pci.c | 1 + drivers/cxl/core/regs.c | 1 + drivers/cxl/cxl.h | 97 +------- drivers/cxl/cxlmem.h | 86 +------ drivers/cxl/pci.c | 16 +- include/cxl/cxl.h | 226 ++++++++++++++++++ include/cxl/pci.h | 23 ++ tools/testing/cxl/test/mem.c | 3 +- 11 files changed, 304 insertions(+), 195 deletions(-) create mode 100644 include/cxl/cxl.h create mode 100644 include/cxl/pci.h diff --git a/Documentation/driver-api/cxl/theory-of-operation.rst b/Documentation/driver-api/cxl/theory-of-operation.rst index 257f513e320c..ab8ebe7722a9 100644 --- a/Documentation/driver-api/cxl/theory-of-operation.rst +++ b/Documentation/driver-api/cxl/theory-of-operation.rst @@ -347,6 +347,9 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: +.. kernel-doc:: include/cxl/cxl.h + :internal: + .. kernel-doc:: drivers/cxl/acpi.c :identifiers: add_cxl_resources diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656..bee84d0101d1 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1514,23 +1514,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec) { struct cxl_memdev_state *mds; int rc; - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, + dvsec, struct cxl_memdev_state, cxlds, + true); if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } mutex_init(&mds->event.log_lock); - mds->cxlds.dev = dev; - mds->cxlds.reg_map.host = dev; - mds->cxlds.cxl_mbox.host = dev; - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); if (rc == -EOPNOTSUPP) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 7927d198b887..97127d6067c4 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -621,6 +621,37 @@ static void detach_memdev(struct work_struct *work) static struct lock_class_key cxl_memdev_key; +static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev, + enum cxl_devtype type, u64 serial, u16 dvsec, + bool has_mbox) +{ + *cxlds = (struct cxl_dev_state) { + .dev = dev, + .type = type, + .serial = serial, + .cxl_dvsec = dvsec, + .reg_map.host = dev, + .reg_map.resource = CXL_RESOURCE_NONE, + }; + + if (has_mbox) + cxlds->cxl_mbox.host = dev; +} + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox) +{ + struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!cxlds) + return NULL; + + cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox); + return cxlds; +} +EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL"); struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d677691f8a05..d3e1ed46b42d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index fb70ffbba72d..dee572775913 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 32fccad9a7f6..db8e74c55309 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -11,6 +11,7 @@ #include #include #include +#include extern const struct nvdimm_security_ops *cxl_security_ops; @@ -200,97 +201,6 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* - * Using struct_group() allows for per register-block-type helper routines, - * without requiring block-type agnostic code to include the prefix. - */ -struct cxl_regs { - /* - * Common set of CXL Component register block base pointers - * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure - * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure - */ - struct_group_tagged(cxl_component_regs, component, - void __iomem *hdm_decoder; - void __iomem *ras; - ); - /* - * Common set of CXL Device register block base pointers - * @status: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers - */ - struct_group_tagged(cxl_device_regs, device_regs, - void __iomem *status, *mbox, *memdev; - ); - - struct_group_tagged(cxl_pmu_regs, pmu_regs, - void __iomem *pmu; - ); - - /* - * RCH downstream port specific RAS register - * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB - */ - struct_group_tagged(cxl_rch_regs, rch_regs, - void __iomem *dport_aer; - ); - - /* - * RCD upstream port specific PCIe cap register - * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB - */ - struct_group_tagged(cxl_rcd_regs, rcd_regs, - void __iomem *rcd_pcie_cap; - ); -}; - -struct cxl_reg_map { - bool valid; - int id; - unsigned long offset; - unsigned long size; -}; - -struct cxl_component_reg_map { - struct cxl_reg_map hdm_decoder; - struct cxl_reg_map ras; -}; - -struct cxl_device_reg_map { - struct cxl_reg_map status; - struct cxl_reg_map mbox; - struct cxl_reg_map memdev; -}; - -struct cxl_pmu_reg_map { - struct cxl_reg_map pmu; -}; - -/** - * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @host: device for devm operations and logging - * @base: virtual base of the register-block-BAR + @block_offset - * @resource: physical resource base of the register block - * @max_size: maximum mapping size to perform register search - * @reg_type: see enum cxl_regloc_type - * @component_map: cxl_reg_map for component registers - * @device_map: cxl_reg_maps for device registers - * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units - */ -struct cxl_register_map { - struct device *host; - void __iomem *base; - resource_size_t resource; - resource_size_t max_size; - u8 reg_type; - union { - struct cxl_component_reg_map component_map; - struct cxl_device_reg_map device_map; - struct cxl_pmu_reg_map pmu_map; - }; -}; - void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, @@ -484,11 +394,6 @@ struct cxl_region_params { resource_size_t cache_size; }; -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 82e8188c76a0..86aa4899d511 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include "cxl.h" @@ -373,87 +373,6 @@ struct cxl_security_state { struct kernfs_node *sanitize_node; }; -/* - * enum cxl_devtype - delineate type-2 from a generic type-3 device - * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or - * HDM-DB, no requirement that this device implements a - * mailbox, or other memory-device-standard manageability - * flows. - * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with - * HDM-H and class-mandatory memory device registers - */ -enum cxl_devtype { - CXL_DEVTYPE_DEVMEM, - CXL_DEVTYPE_CLASSMEM, -}; - -/** - * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range: range for DPA address - * @coord: QoS performance data (i.e. latency, bandwidth) - * @cdat_coord: raw QoS performance data from CDAT - * @qos_class: QoS Class cookies - */ -struct cxl_dpa_perf { - struct range dpa_range; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; - int qos_class; -}; - -/** - * struct cxl_dpa_partition - DPA partition descriptor - * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) - * @perf: performance attributes of the partition from CDAT - * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... - */ -struct cxl_dpa_partition { - struct resource res; - struct cxl_dpa_perf perf; - enum cxl_partition_mode mode; -}; - -/** - * struct cxl_dev_state - The driver device state - * - * cxl_dev_state represents the CXL driver/device state. It provides an - * interface to mailbox commands as well as some cached data about the device. - * Currently only memory devices are represented. - * - * @dev: The device associated with this CXL state - * @cxlmd: The device representing the CXL.mem capabilities of @dev - * @reg_map: component and ras register mapping parameters - * @regs: Parsed register blocks - * @cxl_dvsec: Offset to the PCIe device DVSEC - * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) - * @media_ready: Indicate whether the device media is usable - * @dpa_res: Overall DPA resource tree for the device - * @part: DPA partition array - * @nr_partitions: Number of DPA partitions - * @serial: PCIe Device Serial Number - * @type: Generic Memory Class device or Vendor Specific Memory device - * @cxl_mbox: CXL mailbox context - * @cxlfs: CXL features context - */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - struct cxl_register_map reg_map; - struct cxl_regs regs; - int cxl_dvsec; - bool rcd; - bool media_ready; - struct resource dpa_res; - struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; - unsigned int nr_partitions; - u64 serial; - enum cxl_devtype type; - struct cxl_mailbox cxl_mbox; -#ifdef CONFIG_CXL_FEATURES - struct cxl_features_state *cxlfs; -#endif -}; - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { /* @@ -858,7 +777,8 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0a3108d552c8..65d0d8fc7e99 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include "cxlmem.h" #include "cxlpci.h" @@ -912,6 +914,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc, pmu_count; unsigned int i; bool irq_avail; + u16 dvsec; /* * Double check the anonymous union trickery in struct cxl_regs @@ -925,19 +928,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; pci_set_master(pdev); - mds = cxl_memdev_state_create(&pdev->dev); + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PCIE_DEVICE); + if (!dvsec) + pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); + + mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); - cxlds->serial = pci_get_dsn(pdev); - cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); - if (!cxlds->cxl_dvsec) - dev_warn(&pdev->dev, - "Device DVSEC not present, skip CXL.mem init\n"); rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 000000000000..13d448686189 --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. */ +/* Copyright(c) 2025 Advanced Micro Devices, Inc. */ + +#ifndef __CXL_CXL_H__ +#define __CXL_CXL_H__ + +#include +#include +#include + +/** + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + +struct device; + +/* + * Using struct_group() allows for per register-block-type helper routines, + * without requiring block-type agnostic code to include the prefix. + */ +struct cxl_regs { + /* + * Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure + */ + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + /* + * Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); +}; + +struct cxl_reg_map { + bool valid; + int id; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + +/** + * struct cxl_register_map - DVSEC harvested register block mapping parameters + * @host: device for devm operations and logging + * @base: virtual base of the register-block-BAR + @block_offset + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search + * @reg_type: see enum cxl_regloc_type + * @component_map: cxl_reg_map for component registers + * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + */ +struct cxl_register_map { + struct device *host; + void __iomem *base; + resource_size_t resource; + resource_size_t max_size; + u8 reg_type; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; + }; +}; + +/** + * struct cxl_dpa_perf - DPA performance property entry + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT + * @qos_class: QoS Class cookies + */ +struct cxl_dpa_perf { + struct range dpa_range; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; + int qos_class; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +/** + * struct cxl_dev_state - The driver device state + * + * cxl_dev_state represents the CXL driver/device state. It provides an + * interface to mailbox commands as well as some cached data about the device. + * Currently only memory devices are represented. + * + * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) + * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context + */ +struct cxl_dev_state { + /* public for Type2 drivers */ + struct device *dev; + struct cxl_memdev *cxlmd; + + /* private for Type2 drivers */ + struct cxl_register_map reg_map; + struct cxl_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; + u64 serial; + enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif +}; + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: drv_struct member as cxl_dev_state + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) +#endif /* __CXL_CXL_H__ */ diff --git a/include/cxl/pci.h b/include/cxl/pci.h new file mode 100644 index 000000000000..5729a93b252a --- /dev/null +++ b/include/cxl/pci.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ + +#ifndef __CXL_CXL_PCI_H__ +#define __CXL_CXL_PCI_H__ + +/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + ((i) * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + ((i) * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + ((i) * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + ((i) * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) + +#endif diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 3d553661ca75..369907048362 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1717,7 +1717,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mds = cxl_memdev_state_create(dev); + mds = cxl_memdev_state_create(dev, pdev->id + 1, 0); if (IS_ERR(mds)) return PTR_ERR(mds); @@ -1733,7 +1733,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; -- 2.34.1 From: Alejandro Lucero Add CXL initialization based on new CXL API for accel drivers and make it dependent on kernel CXL configuration. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Acked-by: Edward Cree Reviewed-by: Alison Schofield Reviewed-by: Dan Williams --- drivers/net/ethernet/sfc/Kconfig | 9 +++++ drivers/net/ethernet/sfc/Makefile | 1 + drivers/net/ethernet/sfc/efx.c | 15 ++++++- drivers/net/ethernet/sfc/efx_cxl.c | 57 +++++++++++++++++++++++++++ drivers/net/ethernet/sfc/efx_cxl.h | 40 +++++++++++++++++++ drivers/net/ethernet/sfc/net_driver.h | 10 +++++ 6 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/sfc/efx_cxl.c create mode 100644 drivers/net/ethernet/sfc/efx_cxl.h diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index c4c43434f314..979f2801e2a8 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -66,6 +66,15 @@ config SFC_MCDI_LOGGING Driver-Interface) commands and responses, allowing debugging of driver/firmware interaction. The tracing is actually enabled by a sysfs file 'mcdi_logging' under the PCI device. +config SFC_CXL + bool "Solarflare SFC9100-family CXL support" + depends on SFC && CXL_BUS >= SFC + default SFC + help + This enables SFC CXL support if the kernel is configuring CXL for + using CTPIO with CXL.mem. The SFC device with CXL support and + with a CXL-aware firmware can be used for minimizing latencies + when sending through CTPIO. source "drivers/net/ethernet/sfc/falcon/Kconfig" source "drivers/net/ethernet/sfc/siena/Kconfig" diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index d99039ec468d..bb0f1891cde6 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -13,6 +13,7 @@ sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ tc_encap_actions.o tc_conntrack.o +sfc-$(CONFIG_SFC_CXL) += efx_cxl.o obj-$(CONFIG_SFC) += sfc.o obj-$(CONFIG_SFC_FALCON) += falcon/ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 112e55b98ed3..537668278375 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -34,6 +34,7 @@ #include "selftest.h" #include "sriov.h" #include "efx_devlink.h" +#include "efx_cxl.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -981,12 +982,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); efx_fini_io(efx); + + probe_data = container_of(efx, struct efx_probe_data, efx); + efx_cxl_exit(probe_data); + pci_dbg(efx->pci_dev, "shutdown successful\n"); efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); - probe_data = container_of(efx, struct efx_probe_data, efx); kfree(probe_data); }; @@ -1190,6 +1194,15 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail2; + /* A successful cxl initialization implies a CXL region created to be + * used for PIO buffers. If there is no CXL support, or initialization + * fails, efx_cxl_pio_initialised will be false and legacy PIO buffers + * defined at specific PCI BAR regions will be used. + */ + rc = efx_cxl_init(probe_data); + if (rc) + pci_err(pci_dev, "CXL initialization failed with error %d\n", rc); + rc = efx_pci_probe_post_io(efx); if (rc) { /* On failure, retry once immediately. diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c new file mode 100644 index 000000000000..56d148318636 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include +#include + +#include "net_driver.h" +#include "efx_cxl.h" + +#define EFX_CTPIO_BUFFER_SIZE SZ_256M + +int efx_cxl_init(struct efx_probe_data *probe_data) +{ + struct efx_nic *efx = &probe_data->efx; + struct pci_dev *pci_dev = efx->pci_dev; + struct efx_cxl *cxl; + u16 dvsec; + + probe_data->cxl_pio_initialised = false; + + /* Is the device configured with and using CXL? */ + if (!pcie_is_cxl(pci_dev)) + return 0; + + dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL, + CXL_DVSEC_PCIE_DEVICE); + if (!dvsec) { + pci_err(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability not found\n"); + return 0; + } + + pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n"); + + /* Create a cxl_dev_state embedded in the cxl struct using cxl core api + * specifying no mbox available. + */ + cxl = devm_cxl_dev_state_create(&pci_dev->dev, CXL_DEVTYPE_DEVMEM, + pci_dev->dev.id, dvsec, struct efx_cxl, + cxlds, false); + + if (!cxl) + return -ENOMEM; + + probe_data->cxl = cxl; + + return 0; +} + +void efx_cxl_exit(struct efx_probe_data *probe_data) +{ +} + +MODULE_IMPORT_NS("CXL"); diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h new file mode 100644 index 000000000000..961639cef692 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CXL_H +#define EFX_CXL_H + +#ifdef CONFIG_SFC_CXL + +#include + +struct cxl_root_decoder; +struct cxl_port; +struct cxl_endpoint_decoder; +struct cxl_region; +struct efx_probe_data; + +struct efx_cxl { + struct cxl_dev_state cxlds; + struct cxl_memdev *cxlmd; + struct cxl_root_decoder *cxlrd; + struct cxl_port *endpoint; + struct cxl_endpoint_decoder *cxled; + struct cxl_region *efx_region; + void __iomem *ctpio_cxl; +}; + +int efx_cxl_init(struct efx_probe_data *probe_data); +void efx_cxl_exit(struct efx_probe_data *probe_data); +#else +static inline int efx_cxl_init(struct efx_probe_data *probe_data) { return 0; } +static inline void efx_cxl_exit(struct efx_probe_data *probe_data) {} +#endif +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 5c0f306fb019..0e685b8a9980 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1199,14 +1199,24 @@ struct efx_nic { atomic_t n_rx_noskb_drops; }; +#ifdef CONFIG_SFC_CXL +struct efx_cxl; +#endif + /** * struct efx_probe_data - State after hardware probe * @pci_dev: The PCI device * @efx: Efx NIC details + * @cxl: details of related cxl objects + * @cxl_pio_initialised: cxl initialization outcome. */ struct efx_probe_data { struct pci_dev *pci_dev; struct efx_nic efx; +#ifdef CONFIG_SFC_CXL + struct efx_cxl *cxl; + bool cxl_pio_initialised; +#endif }; static inline struct efx_nic *efx_netdev_priv(struct net_device *dev) -- 2.34.1 From: Alejandro Lucero Inside cxl/core/pci.c there are helpers for CXL PCIe initialization meanwhile cxl/pci.c implements the functionality for a Type3 device initialization. Move helper functions from cxl/pci.c to cxl/core/pci.c in order to be exported and shared with CXL Type2 device initialization. Fix cxl mock tests affected by the code move, deleting a function which indeed was not being used since commit 733b57f262b0("cxl/pci: Early setup RCH dport component registers from RCRB"). Signed-off-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham Reviewed-by: Fan Ni Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Dan Williams --- drivers/cxl/core/core.h | 2 + drivers/cxl/core/pci.c | 62 +++++++++++++++++++++++++++++++ drivers/cxl/core/regs.c | 1 - drivers/cxl/cxl.h | 2 - drivers/cxl/cxlpci.h | 13 +++++++ drivers/cxl/pci.c | 70 ----------------------------------- include/cxl/pci.h | 2 + tools/testing/cxl/Kbuild | 1 - tools/testing/cxl/test/mock.c | 17 --------- 9 files changed, 79 insertions(+), 91 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 385bfd38b778..d96213c02fd6 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -207,4 +207,6 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, u16 *return_code); #endif +resource_size_t cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index d3e1ed46b42d..4e5688e7e972 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -728,6 +728,68 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); +static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, + struct cxl_register_map *map, + struct cxl_dport *dport) +{ + resource_size_t component_reg_phys; + + *map = (struct cxl_register_map) { + .host = &pdev->dev, + .resource = CXL_RESOURCE_NONE, + }; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); + if (component_reg_phys == CXL_RESOURCE_NONE) + return -ENXIO; + + map->resource = component_reg_phys; + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + return 0; +} + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + int rc; + + rc = cxl_find_regblock(pdev, type, map); + + /* + * If the Register Locator DVSEC does not exist, check if it + * is an RCH and try to extract the Component Registers from + * an RCRB. + */ + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { + return rc; + } + + return cxl_setup_regs(map); +} +EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL"); + int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) { int speed, bw; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index dee572775913..dcf444f1fe48 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -642,4 +642,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, return CXL_RESOURCE_NONE; return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM); } -EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index db8e74c55309..e197c36c7525 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -221,8 +221,6 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; -resource_size_t cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport); int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index ccf0ca36bc00..4b11757a46ab 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -74,9 +74,22 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } +/* + * Assume that the caller has already validated that @pdev has CXL + * capabilities, any RCIEp with CXL capabilities is treated as a + * Restricted CXL Device (RCD) and finds upstream port and endpoint + * registers in a Root Complex Register Block (RCRB). + */ +static inline bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 65d0d8fc7e99..d556e8be1155 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -468,76 +468,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return 0; } -/* - * Assume that any RCIEP that emits the CXL memory expander class code - * is an RCD - */ -static bool is_cxl_restricted(struct pci_dev *pdev) -{ - return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; -} - -static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map, - struct cxl_dport *dport) -{ - resource_size_t component_reg_phys; - - *map = (struct cxl_register_map) { - .host = &pdev->dev, - .resource = CXL_RESOURCE_NONE, - }; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - if (component_reg_phys == CXL_RESOURCE_NONE) - return -ENXIO; - - map->resource = component_reg_phys; - map->reg_type = CXL_REGLOC_RBI_COMPONENT; - map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; - - return 0; -} - -static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - int rc; - - rc = cxl_find_regblock(pdev, type, map); - - /* - * If the Register Locator DVSEC does not exist, check if it - * is an RCH and try to extract the Component Registers from - * an RCRB. - */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { - struct cxl_dport *dport; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - rc = cxl_rcrb_get_comp_regs(pdev, map, dport); - if (rc) - return rc; - - rc = cxl_dport_map_rcd_linkcap(pdev, dport); - if (rc) - return rc; - - } else if (rc) { - return rc; - } - - return cxl_setup_regs(map); -} - static int cxl_pci_ras_unmask(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); diff --git a/include/cxl/pci.h b/include/cxl/pci.h index 5729a93b252a..d31e1363e1fd 100644 --- a/include/cxl/pci.h +++ b/include/cxl/pci.h @@ -4,6 +4,8 @@ #ifndef __CXL_CXL_PCI_H__ #define __CXL_CXL_PCI_H__ +#include + /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 #define CXL_DVSEC_CAP_OFFSET 0xA diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 385301aeaeb3..629880c5b9ed 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -12,7 +12,6 @@ ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=cxl_hdm_decode_init ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport -ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1989ae020df3..c471400116a1 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -268,23 +268,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL"); -resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport) -{ - int index; - resource_size_t component_reg_phys; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(dev)) - component_reg_phys = CXL_RESOURCE_NONE; - else - component_reg_phys = cxl_rcd_component_reg_phys(dev, dport); - put_cxl_mock_ops(index); - - return component_reg_phys; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL"); - void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) { int index; -- 2.34.1 From: Alejandro Lucero Export cxl core functions for a Type2 driver being able to discover and map the device component registers. Use it in sfc driver cxl initialization. Signed-off-by: Alejandro Lucero Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron --- drivers/cxl/core/port.c | 1 + drivers/cxl/cxl.h | 7 ------- drivers/cxl/cxlpci.h | 12 ----------- drivers/net/ethernet/sfc/efx_cxl.c | 33 ++++++++++++++++++++++++++++++ include/cxl/cxl.h | 20 ++++++++++++++++++ include/cxl/pci.h | 15 ++++++++++++++ 6 files changed, 69 insertions(+), 19 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bb326dc95d5f..240c3c5bcdc8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index e197c36c7525..793d4dfe51a2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -38,10 +38,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops; #define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) #define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) -#define CXL_CM_CAP_CAP_ID_RAS 0x2 -#define CXL_CM_CAP_CAP_ID_HDM 0x5 -#define CXL_CM_CAP_CAP_HDM_VERSION 1 - /* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) @@ -205,9 +201,6 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(const struct cxl_register_map *map, - struct cxl_component_regs *regs, - unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 4b11757a46ab..2247823acf6f 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -13,16 +13,6 @@ */ #define CXL_PCI_DEFAULT_MAX_VECTORS 16 -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_PMU, - CXL_REGLOC_RBI_TYPES -}; - /* * Table Access DOE, CDAT Read Entry Response * @@ -90,6 +80,4 @@ struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); -int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 56d148318636..cdfbe546d8d8 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -5,6 +5,7 @@ * Copyright (C) 2025, Advanced Micro Devices, Inc. */ +#include #include #include @@ -19,6 +20,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) struct pci_dev *pci_dev = efx->pci_dev; struct efx_cxl *cxl; u16 dvsec; + int rc; probe_data->cxl_pio_initialised = false; @@ -45,6 +47,37 @@ int efx_cxl_init(struct efx_probe_data *probe_data) if (!cxl) return -ENOMEM; + rc = cxl_pci_setup_regs(pci_dev, CXL_REGLOC_RBI_COMPONENT, + &cxl->cxlds.reg_map); + if (rc) { + dev_err(&pci_dev->dev, "No component registers (err=%d)\n", rc); + return rc; + } + + if (!cxl->cxlds.reg_map.component_map.hdm_decoder.valid) { + dev_err(&pci_dev->dev, "Expected HDM component register not found\n"); + return -ENODEV; + } + + if (!cxl->cxlds.reg_map.component_map.ras.valid) + return dev_err_probe(&pci_dev->dev, -ENODEV, + "Expected RAS component register not found\n"); + + rc = cxl_map_component_regs(&cxl->cxlds.reg_map, + &cxl->cxlds.regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) { + dev_err(&pci_dev->dev, "Failed to map RAS capability.\n"); + return rc; + } + + /* + * Set media ready explicitly as there are neither mailbox for checking + * this state nor the CXL register involved, both not mandatory for + * type2. + */ + cxl->cxlds.media_ready = true; + probe_data->cxl = cxl; return 0; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 13d448686189..3b9c8cb187a3 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -70,6 +70,10 @@ struct cxl_regs { ); }; +#define CXL_CM_CAP_CAP_ID_RAS 0x2 +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + struct cxl_reg_map { bool valid; int id; @@ -223,4 +227,20 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ sizeof(drv_struct), mbox); \ }) + +/** + * cxl_map_component_regs - map cxl component registers + * + * + * @map: cxl register map to update with the mappings + * @regs: cxl component registers to work with + * @map_mask: cxl component regs to map + * + * Returns integer: success (0) or error (-ENOMEM) + * + * Made public for Type2 driver support. + */ +int cxl_map_component_regs(const struct cxl_register_map *map, + struct cxl_component_regs *regs, + unsigned long map_mask); #endif /* __CXL_CXL_H__ */ diff --git a/include/cxl/pci.h b/include/cxl/pci.h index d31e1363e1fd..bd12e29bcdc9 100644 --- a/include/cxl/pci.h +++ b/include/cxl/pci.h @@ -23,3 +23,18 @@ #define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) #endif + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, + CXL_REGLOC_RBI_TYPES +}; + +struct cxl_register_map; + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); -- 2.34.1 From: Alejandro Lucero Type3 relies on mailbox CXL_MBOX_OP_IDENTIFY command for initializing memdev state params which end up being used for DPA initialization. Allow a Type2 driver to initialize DPA simply by giving the size of its volatile hardware partition. Move related functions to memdev. Add sfc driver as the client. Signed-off-by: Alejandro Lucero Reviewed-by: Dan Williams --- drivers/cxl/core/core.h | 2 + drivers/cxl/core/mbox.c | 51 +---------------------- drivers/cxl/core/memdev.c | 66 ++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/efx_cxl.c | 4 ++ include/cxl/cxl.h | 1 + 5 files changed, 74 insertions(+), 50 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index d96213c02fd6..c4dddbec5d6e 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -90,6 +90,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, struct dentry *cxl_debugfs_create_dir(const char *dir); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode); +struct cxl_memdev_state; +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index bee84d0101d1..d57a0c2d39fb 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1144,7 +1144,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL"); * * See CXL @8.2.9.5.2.1 Get Partition Info */ -static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_partition_info pi; @@ -1300,55 +1300,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return -EBUSY; } -static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) -{ - int i = info->nr_partitions; - - if (size == 0) - return; - - info->part[i].range = (struct range) { - .start = start, - .end = start + size - 1, - }; - info->part[i].mode = mode; - info->nr_partitions++; -} - -int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) -{ - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; - int rc; - - if (!cxlds->media_ready) { - info->size = 0; - return 0; - } - - info->size = mds->total_bytes; - - if (mds->partition_align_bytes == 0) { - add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->volatile_only_bytes, - mds->persistent_only_bytes, CXL_PARTMODE_PMEM); - return 0; - } - - rc = cxl_mem_get_partition_info(mds); - if (rc) { - dev_err(dev, "Failed to query partition information\n"); - return rc; - } - - add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, - CXL_PARTMODE_PMEM); - - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); - int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 97127d6067c4..d148a0c942aa 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -556,6 +556,72 @@ bool is_cxl_memdev(const struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) +{ + int i = info->nr_partitions; + + if (size == 0) + return; + + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; +} + +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = cxlds->dev; + int rc; + + if (!cxlds->media_ready) { + info->size = 0; + return 0; + } + + info->size = mds->total_bytes; + + if (mds->partition_align_bytes == 0) { + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; + } + + rc = cxl_mem_get_partition_info(mds); + if (rc) { + dev_err(dev, "Failed to query partition information\n"); + return rc; + } + + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); + +/** + * cxl_set_capacity: initialize dpa by a driver without a mailbox. + * + * @cxlds: pointer to cxl_dev_state + * @capacity: device volatile memory size + */ +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity) +{ + struct cxl_dpa_info range_info = { + .size = capacity, + }; + + add_part(&range_info, 0, capacity, CXL_PARTMODE_RAM); + return cxl_dpa_setup(cxlds, &range_info); +} +EXPORT_SYMBOL_NS_GPL(cxl_set_capacity, "CXL"); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @mds: The device state to operate on diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index cdfbe546d8d8..651d26aa68dc 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -78,6 +78,10 @@ int efx_cxl_init(struct efx_probe_data *probe_data) */ cxl->cxlds.media_ready = true; + if (cxl_set_capacity(&cxl->cxlds, EFX_CTPIO_BUFFER_SIZE)) + return dev_err_probe(&pci_dev->dev, -ENODEV, + "dpa capacity setup failed\n"); + probe_data->cxl = cxl; return 0; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 3b9c8cb187a3..88dea6ac3769 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -243,4 +243,5 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask); +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero Current cxl core is relying on a CXL_DEVTYPE_CLASSMEM type device when creating a memdev leading to problems when obtaining cxl_memdev_state references from a CXL_DEVTYPE_DEVMEM type. Modify check for obtaining cxl_memdev_state adding CXL_DEVTYPE_DEVMEM support. Make devm_cxl_add_memdev accessible from a accel driver. Signed-off-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Dan Williams --- drivers/cxl/core/memdev.c | 15 +++++++++++-- drivers/cxl/cxlmem.h | 8 ------- drivers/cxl/mem.c | 46 +++++++++++++++++++++++++++++---------- include/cxl/cxl.h | 7 ++++++ 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index d148a0c942aa..3228287bf3f0 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "private.h" #include "trace.h" @@ -550,9 +551,16 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +static const struct device_type cxl_accel_memdev_type = { + .name = "cxl_accel_memdev", + .release = cxl_memdev_release, + .devnode = cxl_memdev_devnode, +}; + bool is_cxl_memdev(const struct device *dev) { - return dev->type == &cxl_memdev_type; + return (dev->type == &cxl_memdev_type || + dev->type == &cxl_accel_memdev_type); } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); @@ -1139,7 +1147,10 @@ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; + if (cxlds->type == CXL_DEVTYPE_DEVMEM) + dev->type = &cxl_accel_memdev_type; + else + dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); INIT_WORK(&cxlmd->detach_work, detach_memdev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 86aa4899d511..0e02cd9f0bad 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,10 +34,6 @@ (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ CXLMDEV_RESET_NEEDED_NOT) -struct cxl_memdev_ops { - int (*probe)(struct cxl_memdev *cxlmd); -}; - /** * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device * @dev: driver core device object @@ -101,10 +97,6 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds, - const struct cxl_memdev_ops *ops); - int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 7480dfdbb57d..9ffee09fcb50 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -67,6 +67,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_debugfs_poison_clear, "%llx\n"); +static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds, + struct cxl_memdev *cxlmd, + struct dentry *dentry) +{ + /* + * Avoid poison debugfs for DEVMEM aka accelerators as they rely on + * cxl_memdev_state. + */ + if (!mds) + return; + + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -94,12 +114,8 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) - debugfs_create_file("inject_poison", 0200, dentry, cxlmd, - &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) - debugfs_create_file("clear_poison", 0200, dentry, cxlmd, - &cxl_poison_clear_fops); + /* for CLASSMEM memory expanders enable poison injection */ + cxl_memdev_poison_enable(mds, cxlmd, dentry); rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) @@ -236,16 +252,24 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (a == &dev_attr_trigger_poison_list.attr) - if (!test_bit(CXL_POISON_ENABLED_LIST, - mds->poison.enabled_cmds)) - return 0; + if (!mds || + !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) + return false; + + return true; +} + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr && + !cxl_poison_attr_visible(kobj, a)) + return 0; return a->mode; } diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 88dea6ac3769..401a59185608 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -153,6 +153,10 @@ struct cxl_dpa_partition { #define CXL_NR_PARTITIONS_MAX 2 +struct cxl_memdev_ops { + int (*probe)(struct cxl_memdev *cxlmd); +}; + /** * struct cxl_dev_state - The driver device state * @@ -244,4 +248,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask); int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero Use cxl API for creating a cxl memory device using the type2 cxl_dev_state struct. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Reviewed-by: Fan Ni Acked-by: Edward Cree Reviewed-by: Jonathan Cameron --- drivers/net/ethernet/sfc/efx_cxl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 651d26aa68dc..177c60b269d6 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -82,6 +82,12 @@ int efx_cxl_init(struct efx_probe_data *probe_data) return dev_err_probe(&pci_dev->dev, -ENODEV, "dpa capacity setup failed\n"); + cxl->cxlmd = devm_cxl_add_memdev(&pci_dev->dev, &cxl->cxlds, NULL); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pci_dev, "CXL accel memdev creation failed"); + return PTR_ERR(cxl->cxlmd); + } + probe_data->cxl = cxl; return 0; -- 2.34.1 From: Alejandro Lucero The first step for a CXL accelerator driver that wants to establish new CXL.mem regions is to register a 'struct cxl_memdev'. That kicks off cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the topology up to the root. If the port driver has not attached yet the expectation is that the driver waits until that link is established. The common cxl_pci driver has reason to keep the 'struct cxl_memdev' device attached to the bus until the root driver attaches. An accelerator may want to instead defer probing until CXL resources can be acquired. Use the @endpoint attribute of a 'struct cxl_memdev' to convey when a accelerator driver probing should be deferred vs failed. Provide that indication via a new cxl_acquire_endpoint() API that can retrieve the probe status of the memdev. Signed-off-by: Alejandro Lucero --- drivers/cxl/core/memdev.c | 42 +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 2 +- drivers/cxl/mem.c | 7 +++++-- include/cxl/cxl.h | 2 ++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 3228287bf3f0..10d21996598a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1164,6 +1164,48 @@ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(cxl_memdev_alloc, "CXL"); +/* + * Try to get a locked reference on a memdev's CXL port topology + * connection. Be careful to observe when cxl_mem_probe() has deposited + * a probe deferral awaiting the arrival of the CXL root driver. + */ +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint; + int rc = -ENXIO; + + device_lock(&cxlmd->dev); + + endpoint = cxlmd->endpoint; + if (!endpoint) + goto err; + + if (IS_ERR(endpoint)) { + rc = PTR_ERR(endpoint); + goto err; + } + + device_lock(&endpoint->dev); + if (!endpoint->dev.driver) + goto err_endpoint; + + return endpoint; + +err_endpoint: + device_unlock(&endpoint->dev); +err: + device_unlock(&cxlmd->dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(cxl_acquire_endpoint, "CXL"); + +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) +{ + device_unlock(&endpoint->dev); + device_unlock(&cxlmd->dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_release_endpoint, "CXL"); + static void sanitize_teardown_notifier(void *data) { struct cxl_memdev_state *mds = data; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 240c3c5bcdc8..4c3fecd4c8ea 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1557,7 +1557,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, */ dev_dbg(&cxlmd->dev, "%s is a root dport\n", dev_name(dport_dev)); - return -ENXIO; + return -EPROBE_DEFER; } struct cxl_port *parent_port __free(put_cxl_port) = diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 9ffee09fcb50..f103e2003add 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -122,14 +122,17 @@ static int cxl_mem_probe(struct device *dev) return rc; rc = devm_cxl_enumerate_ports(cxlmd); - if (rc) + if (rc) { + cxlmd->endpoint = ERR_PTR(rc); return rc; + } struct cxl_port *parent_port __free(put_cxl_port) = cxl_mem_find_port(cxlmd, &dport); if (!parent_port) { dev_err(dev, "CXL port topology not found\n"); - return -ENXIO; + cxlmd->endpoint = ERR_PTR(-EPROBE_DEFER); + return -EPROBE_DEFER; } if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 401a59185608..64946e698f5f 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -251,4 +251,6 @@ int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); struct cxl_memdev *devm_cxl_add_memdev(struct device *host, struct cxl_dev_state *cxlds, const struct cxl_memdev_ops *ops); +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd); +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero CXL region creation involves allocating capacity from Device Physical Address (DPA) and assigning it to decode a given Host Physical Address (HPA). Before determining how much DPA to allocate the amount of available HPA must be determined. Also, not all HPA is created equal, some HPA targets RAM, some targets PMEM, some is prepared for device-memory flows like HDM-D and HDM-DB, and some is HDM-H (host-only). In order to support Type2 CXL devices, wrap all of those concerns into an API that retrieves a root decoder (platform CXL window) that fits the specified constraints and the capacity available for a new region. Add a complementary function for releasing the reference to such root decoder. Based on https://lore.kernel.org/linux-cxl/168592159290.1948938.13522227102445462976.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron --- drivers/cxl/core/region.c | 166 ++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 3 + include/cxl/cxl.h | 6 ++ 3 files changed, 175 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e9bf42d91689..78f13873397a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -703,6 +703,172 @@ static int free_hpa(struct cxl_region *cxlr) return 0; } +struct cxlrd_max_context { + struct device * const *host_bridges; + int interleave_ways; + unsigned long flags; + resource_size_t max_hpa; + struct cxl_root_decoder *cxlrd; +}; + +static int find_max_hpa(struct device *dev, void *data) +{ + struct cxlrd_max_context *ctx = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + struct resource *res, *prev; + struct cxl_decoder *cxld; + resource_size_t max; + int found = 0; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + cxld = &cxlsd->cxld; + + if ((cxld->flags & ctx->flags) != ctx->flags) { + dev_dbg(dev, "flags not matching: %08lx vs %08lx\n", + cxld->flags, ctx->flags); + return 0; + } + + for (int i = 0; i < ctx->interleave_ways; i++) { + for (int j = 0; j < ctx->interleave_ways; j++) { + if (ctx->host_bridges[i] == cxlsd->target[j]->dport_dev) { + found++; + break; + } + } + } + + if (found != ctx->interleave_ways) { + dev_dbg(dev, + "Not enough host bridges. Found %d for %d interleave ways requested\n", + found, ctx->interleave_ways); + return 0; + } + + /* + * Walk the root decoder resource range relying on cxl_rwsem.region to + * preclude sibling arrival/departure and find the largest free space + * gap. + */ + lockdep_assert_held_read(&cxl_rwsem.region); + res = cxlrd->res->child; + + /* With no resource child the whole parent resource is available */ + if (!res) + max = resource_size(cxlrd->res); + else + max = 0; + + for (prev = NULL; res; prev = res, res = res->sibling) { + struct resource *next = res->sibling; + resource_size_t free = 0; + + /* + * Sanity check for preventing arithmetic problems below as a + * resource with size 0 could imply using the end field below + * when set to unsigned zero - 1 or all f in hex. + */ + if (prev && !resource_size(prev)) + continue; + + if (!prev && res->start > cxlrd->res->start) { + free = res->start - cxlrd->res->start; + max = max(free, max); + } + if (prev && res->start > prev->end + 1) { + free = res->start - prev->end + 1; + max = max(free, max); + } + if (next && res->end + 1 < next->start) { + free = next->start - res->end + 1; + max = max(free, max); + } + if (!next && res->end + 1 < cxlrd->res->end + 1) { + free = cxlrd->res->end + 1 - res->end + 1; + max = max(free, max); + } + } + + dev_dbg(cxlrd_dev(cxlrd), "found %pa bytes of free space\n", &max); + if (max > ctx->max_hpa) { + if (ctx->cxlrd) + put_device(cxlrd_dev(ctx->cxlrd)); + get_device(cxlrd_dev(cxlrd)); + ctx->cxlrd = cxlrd; + ctx->max_hpa = max; + } + return 0; +} + +/** + * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints + * @endpoint: the endpoint requiring the HPA + * @interleave_ways: number of entries in @host_bridges + * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and Type2 device + * @max_avail_contig: output parameter of max contiguous bytes available in the + * returned decoder + * + * Returns a pointer to a struct cxl_root_decoder + * + * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given + * in (@max_avail_contig))' is a point in time snapshot. If by the time the + * caller goes to use this root decoder's capacity the capacity is reduced then + * caller needs to loop and retry. + * + * The returned root decoder has an elevated reference count that needs to be + * put with cxl_put_root_decoder(cxlrd). + */ +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max_avail_contig) +{ + struct cxl_root *root __free(put_cxl_root) = NULL; + struct cxl_port *endpoint = cxlmd->endpoint; + struct cxlrd_max_context ctx = { + .host_bridges = &endpoint->host_bridge, + .flags = flags, + }; + struct cxl_port *root_port; + + if (!endpoint) { + dev_dbg(&cxlmd->dev, "endpoint not linked to memdev\n"); + return ERR_PTR(-ENXIO); + } + + root = find_cxl_root(endpoint); + if (!root) { + dev_dbg(&endpoint->dev, "endpoint can not be related to a root port\n"); + return ERR_PTR(-ENXIO); + } + + root_port = &root->port; + scoped_guard(rwsem_read, &cxl_rwsem.region) + device_for_each_child(&root_port->dev, &ctx, find_max_hpa); + + if (!ctx.cxlrd) + return ERR_PTR(-ENOMEM); + + *max_avail_contig = ctx.max_hpa; + return ctx.cxlrd; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, "CXL"); + +/* + * TODO: those references released here should avoid the decoder to be + * unregistered. + */ +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd) +{ + put_device(cxlrd_dev(cxlrd)); +} +EXPORT_SYMBOL_NS_GPL(cxl_put_root_decoder, "CXL"); + static ssize_t size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 793d4dfe51a2..076640e91ee0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -664,6 +664,9 @@ struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); + +#define cxlrd_dev(cxlrd) (&(cxlrd)->cxlsd.cxld.dev) + bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 64946e698f5f..7722d4190573 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -253,4 +253,10 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host, const struct cxl_memdev_ops *ops); struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd); void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); +struct cxl_port; +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max); +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero Use cxl api for getting HPA (Host Physical Address) to use from a CXL root decoder. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Acked-by: Edward Cree Reviewed-by: Jonathan Cameron --- drivers/cxl/cxl.h | 15 --------------- drivers/net/ethernet/sfc/Kconfig | 1 + drivers/net/ethernet/sfc/efx_cxl.c | 27 +++++++++++++++++++++++++++ include/cxl/cxl.h | 14 ++++++++++++++ 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 076640e91ee0..ab490b5a9457 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -219,21 +219,6 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 -/* - * cxl_decoder flags that define the type of memory / devices this - * decoder supports as well as configuration lock status See "CXL 2.0 - * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. - * Additionally indicate whether decoder settings were autodetected, - * user customized. - */ -#define CXL_DECODER_F_RAM BIT(0) -#define CXL_DECODER_F_PMEM BIT(1) -#define CXL_DECODER_F_TYPE2 BIT(2) -#define CXL_DECODER_F_TYPE3 BIT(3) -#define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_ENABLE BIT(5) -#define CXL_DECODER_F_MASK GENMASK(5, 0) - enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, CXL_DECODER_HOSTONLYMEM = 3, diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 979f2801e2a8..e959d9b4f4ce 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -69,6 +69,7 @@ config SFC_MCDI_LOGGING config SFC_CXL bool "Solarflare SFC9100-family CXL support" depends on SFC && CXL_BUS >= SFC + depends on CXL_REGION default SFC help This enables SFC CXL support if the kernel is configuring CXL for diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 177c60b269d6..d29594e71027 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -18,6 +18,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) { struct efx_nic *efx = &probe_data->efx; struct pci_dev *pci_dev = efx->pci_dev; + resource_size_t max_size; struct efx_cxl *cxl; u16 dvsec; int rc; @@ -88,13 +89,39 @@ int efx_cxl_init(struct efx_probe_data *probe_data) return PTR_ERR(cxl->cxlmd); } + cxl->endpoint = cxl_acquire_endpoint(cxl->cxlmd); + if (IS_ERR(cxl->endpoint)) + return PTR_ERR(cxl->endpoint); + + cxl->cxlrd = cxl_get_hpa_freespace(cxl->cxlmd, 1, + CXL_DECODER_F_RAM | CXL_DECODER_F_TYPE2, + &max_size); + + if (IS_ERR(cxl->cxlrd)) { + pci_err(pci_dev, "cxl_get_hpa_freespace failed\n"); + cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); + return PTR_ERR(cxl->cxlrd); + } + + if (max_size < EFX_CTPIO_BUFFER_SIZE) { + pci_err(pci_dev, "%s: not enough free HPA space %pap < %u\n", + __func__, &max_size, EFX_CTPIO_BUFFER_SIZE); + cxl_put_root_decoder(cxl->cxlrd); + cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); + return -ENOSPC; + } + probe_data->cxl = cxl; + cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); + return 0; } void efx_cxl_exit(struct efx_probe_data *probe_data) { + if (probe_data->cxl) + cxl_put_root_decoder(probe_data->cxl->cxlrd); } MODULE_IMPORT_NS("CXL"); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 7722d4190573..788700fb1eb2 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -153,6 +153,20 @@ struct cxl_dpa_partition { #define CXL_NR_PARTITIONS_MAX 2 +/* + * cxl_decoder flags that define the type of memory / devices this + * decoder supports as well as configuration lock status See "CXL 2.0 + * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. + */ +#define CXL_DECODER_F_RAM BIT(0) +#define CXL_DECODER_F_PMEM BIT(1) +#define CXL_DECODER_F_TYPE2 BIT(2) +#define CXL_DECODER_F_TYPE3 BIT(3) +#define CXL_DECODER_F_LOCK BIT(4) +#define CXL_DECODER_F_ENABLE BIT(5) + struct cxl_memdev_ops { int (*probe)(struct cxl_memdev *cxlmd); }; -- 2.34.1 From: Alejandro Lucero Region creation involves finding available DPA (device-physical-address) capacity to map into HPA (host-physical-address) space. In order to support CXL Type2 devices, define an API, cxl_request_dpa(), that tries to allocate the DPA memory the driver requires to operate.The memory requested should not be bigger than the max available HPA obtained previously with cxl_get_hpa_freespace(). Based on https://lore.kernel.org/linux-cxl/168592158743.1948938.7622563891193802610.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero --- drivers/cxl/core/hdm.c | 83 ++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 1 + include/cxl/cxl.h | 5 +++ 3 files changed, 89 insertions(+) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e9e1d555cec6..d1b1d8ab348a 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "cxlmem.h" #include "core.h" @@ -556,6 +557,13 @@ bool cxl_resource_contains_addr(const struct resource *res, const resource_size_ return resource_contains(res, &_addr); } +/** + * cxl_dpa_free - release DPA (Device Physical Address) + * + * @cxled: endpoint decoder linked to the DPA + * + * Returns 0 or error. + */ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); @@ -582,6 +590,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) devm_cxl_dpa_release(cxled); return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dpa_free, "CXL"); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode) @@ -613,6 +622,80 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, return 0; } +static int find_free_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end + 1); +} + +static struct cxl_endpoint_decoder * +cxl_find_free_decoder(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct device *dev; + + guard(rwsem_read)(&cxl_rwsem.dpa); + dev = device_find_child(&endpoint->dev, NULL, + find_free_decoder); + if (dev) + return to_cxl_endpoint_decoder(dev); + + return NULL; +} + +/** + * cxl_request_dpa - search and reserve DPA given input constraints + * @cxlmd: memdev with an endpoint port with available decoders + * @mode: DPA operation mode (ram vs pmem) + * @alloc: dpa size required + * + * Returns a pointer to a cxl_endpoint_decoder struct or an error + * + * Given that a region needs to allocate from limited HPA capacity it + * may be the case that a device has more mappable DPA capacity than + * available HPA. The expectation is that @alloc is a driver known + * value based on the device capacity but it could not be available + * due to HPA constraints. + * + * Returns a pinned cxl_decoder with at least @alloc bytes of capacity + * reserved, or an error pointer. The caller is also expected to own the + * lifetime of the memdev registration associated with the endpoint to + * pin the decoder registered as well. + */ +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc) +{ + struct cxl_endpoint_decoder *cxled __free(put_cxled) = + cxl_find_free_decoder(cxlmd); + int rc; + + if (!IS_ALIGNED(alloc, SZ_256M)) + return ERR_PTR(-EINVAL); + + if (!cxled) + return ERR_PTR(-ENODEV); + + rc = cxl_dpa_set_part(cxled, mode); + if (rc) + return ERR_PTR(rc); + + rc = cxl_dpa_alloc(cxled, alloc); + if (rc) + return ERR_PTR(rc); + + return no_free_ptr(cxled); +} +EXPORT_SYMBOL_NS_GPL(cxl_request_dpa, "CXL"); + static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ab490b5a9457..0020d8e474a6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -625,6 +625,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) +DEFINE_FREE(put_cxled, struct cxl_endpoint_decoder *, if (_T) put_device(&_T->cxld.dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 788700fb1eb2..0a607710340d 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -7,6 +7,7 @@ #include #include +#include #include /** @@ -273,4 +274,8 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, unsigned long flags, resource_size_t *max); void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc); +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero Use cxl api for getting DPA (Device Physical Address) to use through an endpoint decoder. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Acked-by: Edward Cree Reviewed-by: Jonathan Cameron --- drivers/net/ethernet/sfc/efx_cxl.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index d29594e71027..4461b7a4dc2c 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -99,16 +99,23 @@ int efx_cxl_init(struct efx_probe_data *probe_data) if (IS_ERR(cxl->cxlrd)) { pci_err(pci_dev, "cxl_get_hpa_freespace failed\n"); - cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); - return PTR_ERR(cxl->cxlrd); + rc = PTR_ERR(cxl->cxlrd); + goto err_release; } if (max_size < EFX_CTPIO_BUFFER_SIZE) { pci_err(pci_dev, "%s: not enough free HPA space %pap < %u\n", __func__, &max_size, EFX_CTPIO_BUFFER_SIZE); - cxl_put_root_decoder(cxl->cxlrd); - cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); - return -ENOSPC; + rc = -ENOSPC; + goto err_decoder; + } + + cxl->cxled = cxl_request_dpa(cxl->cxlmd, CXL_PARTMODE_RAM, + EFX_CTPIO_BUFFER_SIZE); + if (IS_ERR(cxl->cxled)) { + pci_err(pci_dev, "CXL accel request DPA failed"); + rc = PTR_ERR(cxl->cxled); + goto err_decoder; } probe_data->cxl = cxl; @@ -116,12 +123,21 @@ int efx_cxl_init(struct efx_probe_data *probe_data) cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); return 0; + +err_decoder: + cxl_put_root_decoder(cxl->cxlrd); +err_release: + cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); + + return rc; } void efx_cxl_exit(struct efx_probe_data *probe_data) { - if (probe_data->cxl) + if (probe_data->cxl) { + cxl_dpa_free(probe_data->cxl->cxled); cxl_put_root_decoder(probe_data->cxl->cxlrd); + } } MODULE_IMPORT_NS("CXL"); -- 2.34.1 From: Alejandro Lucero Current code is expecting Type3 or CXL_DECODER_HOSTONLYMEM devices only. Support for Type2 implies region type needs to be based on the endpoint type HDM-D[B] instead. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield Reviewed-by: Davidlohr Bueso --- drivers/cxl/core/region.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 78f13873397a..3c65ffd17a98 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2753,7 +2753,8 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_partition_mode mode, int id) + enum cxl_partition_mode mode, int id, + enum cxl_decoder_type target_type) { int rc; @@ -2775,7 +2776,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); + return devm_cxl_add_region(cxlrd, id, mode, target_type); } static ssize_t create_region_store(struct device *dev, const char *buf, @@ -2789,7 +2790,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, mode, id); + cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); @@ -3581,7 +3582,8 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, - atomic_read(&cxlrd->region_id)); + atomic_read(&cxlrd->region_id), + cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); if (IS_ERR(cxlr)) { -- 2.34.1 From: Alejandro Lucero Region creation based on Type3 devices is triggered from user space allowing memory combination through interleaving. In preparation for kernel driven region creation, that is Type2 drivers triggering region creation backed with its advertised CXL memory, factor out a common helper from the user-sysfs region setup for interleave ways. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield --- drivers/cxl/core/region.c | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 3c65ffd17a98..6ea74f53936a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -483,22 +483,14 @@ static ssize_t interleave_ways_show(struct device *dev, static const struct attribute_group *get_cxl_region_target_group(void); -static ssize_t interleave_ways_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_ways(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - unsigned int val, save; - int rc; + int save, rc; u8 iw; - rc = kstrtouint(buf, 0, &val); - if (rc) - return rc; - rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -513,9 +505,7 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; @@ -523,10 +513,31 @@ static ssize_t interleave_ways_store(struct device *dev, save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) { + if (rc) p->interleave_ways = save; + + return rc; +} + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_ways(cxlr, val); + if (rc) return rc; - } return len; } -- 2.34.1 From: Alejandro Lucero Region creation based on Type3 devices is triggered from user space allowing memory combination through interleaving. In preparation for kernel driven region creation, that is Type2 drivers triggering region creation backed with its advertised CXL memory, factor out a common helper from the user-sysfs region setup forinterleave granularity. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield --- drivers/cxl/core/region.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6ea74f53936a..7b05e41e8fad 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -557,21 +557,14 @@ static ssize_t interleave_granularity_show(struct device *dev, return sysfs_emit(buf, "%d\n", p->interleave_granularity); } -static ssize_t interleave_granularity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_granularity(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - int rc, val; + int rc; u16 ig; - rc = kstrtoint(buf, 0, &val); - if (rc) - return rc; - rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -587,14 +580,32 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; - + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; p->interleave_granularity = val; + return 0; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_granularity(cxlr, val); + if (rc) + return rc; return len; } -- 2.34.1 From: Alejandro Lucero Creating a CXL region requires userspace intervention through the cxl sysfs files. Type2 support should allow accelerator drivers to create such cxl region from kernel code. Adding that functionality and integrating it with current support for memory expanders. Support an action by the type2 driver to be linked to the created region for unwinding the resources allocated properly. Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero --- drivers/cxl/core/region.c | 154 ++++++++++++++++++++++++++++++++++++-- drivers/cxl/port.c | 5 +- include/cxl/cxl.h | 4 + 3 files changed, 154 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 7b05e41e8fad..20bd0c82806c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2379,6 +2379,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr, } return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_decoder_detach, "CXL"); static int __attach_target(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, @@ -2864,6 +2865,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name) return to_cxl_region(region_dev); } +static void drop_region(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_port *port = cxlrd_to_port(cxlrd); + + devm_release_action(port->uport_dev, unregister_region, cxlr); +} + static ssize_t delete_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -3592,14 +3601,12 @@ static int __construct_region(struct cxl_region *cxlr, return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc, part = READ_ONCE(cxled->part); + int part = READ_ONCE(cxled->part); struct cxl_region *cxlr; do { @@ -3608,13 +3615,24 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - if (IS_ERR(cxlr)) { + if (IS_ERR(cxlr)) dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, PTR_ERR(cxlr)); - return cxlr; - } + + return cxlr; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_region *cxlr; + int rc; + + cxlr = construct_region_begin(cxlrd, cxled); rc = __construct_region(cxlr, cxlrd, cxled); if (rc) { @@ -3625,6 +3643,126 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, return cxlr; } +static struct cxl_region * +__construct_new_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, int ways) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p; + resource_size_t size = 0; + struct cxl_region *cxlr; + int rc, i; + + cxlr = construct_region_begin(cxlrd, cxled[0]); + if (IS_ERR(cxlr)) + return cxlr; + + guard(rwsem_write)(&cxl_rwsem.region); + + /* + * Sanity check. This should not happen with an accel driver handling + * the region creation. + */ + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s unexpected region state\n", + dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev), + __func__); + rc = -EBUSY; + goto err; + } + + rc = set_interleave_ways(cxlr, ways); + if (rc) + goto err; + + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity); + if (rc) + goto err; + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) { + for (i = 0; i < ways; i++) { + if (!cxled[i]->dpa_res) + break; + size += resource_size(cxled[i]->dpa_res); + } + } + + if (i < ways) + goto err; + + rc = alloc_hpa(cxlr, size); + if (rc) + goto err; + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) { + for (i = 0; i < ways; i++) { + rc = cxl_region_attach(cxlr, cxled[i], 0); + if (rc) + goto err; + } + } + + if (rc) + goto err; + + rc = cxl_region_decode_commit(cxlr); + if (rc) + goto err; + + p->state = CXL_CONFIG_COMMIT; + + return cxlr; +err: + drop_region(cxlr); + return ERR_PTR(rc); +} + +/** + * cxl_create_region - Establish a region given an endpoint decoder + * @cxlrd: root decoder to allocate HPA + * @cxled: endpoint decoder with reserved DPA capacity + * @ways: interleave ways required + * @action: driver function to be called on region removal + * @data: pointer to data structure for the action execution + * + * Returns a fully formed region in the commit state and attached to the + * cxl_region driver. + */ +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways, void (*action)(void *), + void *data) +{ + struct cxl_region *cxlr; + int rc; + + mutex_lock(&cxlrd->range_lock); + cxlr = __construct_new_region(cxlrd, cxled, ways); + mutex_unlock(&cxlrd->range_lock); + if (IS_ERR(cxlr)) + return cxlr; + + if (device_attach(&cxlr->dev) <= 0) { + dev_err(&cxlr->dev, "failed to create region\n"); + drop_region(cxlr); + return ERR_PTR(-ENODEV); + } + + if (action) { + rc = devm_add_action_or_reset(&cxlr->dev, action, data); + if (rc) { + drop_region(cxlr); + return ERR_PTR(rc); + } + } + + return cxlr; +} +EXPORT_SYMBOL_NS_GPL(cxl_create_region, "CXL"); + static struct cxl_region * cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) { diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 83f5a09839ab..e6c0bd0fc9f9 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -35,6 +35,7 @@ static void schedule_detach(void *cxlmd) static int discover_region(struct device *dev, void *unused) { struct cxl_endpoint_decoder *cxled; + struct cxl_memdev *cxlmd; int rc; if (!is_endpoint_decoder(dev)) @@ -44,7 +45,9 @@ static int discover_region(struct device *dev, void *unused) if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0) return 0; - if (cxled->state != CXL_DECODER_STATE_AUTO) + cxlmd = cxled_to_memdev(cxled); + if (cxled->state != CXL_DECODER_STATE_AUTO || + cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM) return 0; /* diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 0a607710340d..dbacefff8d60 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -278,4 +278,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, enum cxl_partition_mode mode, resource_size_t alloc); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways, void (*action)(void *), + void *data); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero By definition a type2 cxl device will use the host managed memory for specific functionality, therefore it should not be available to other uses. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso --- drivers/cxl/core/region.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 20bd0c82806c..e39f272dd445 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3922,6 +3922,13 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* + * HDM-D[B] (device-memory) regions have accelerator specific usage. + * Skip device-dax registration. + */ + if (cxlr->type == CXL_DECODER_DEVMEM) + return 0; + switch (cxlr->mode) { case CXL_PARTMODE_PMEM: rc = devm_cxl_region_edac_register(cxlr); -- 2.34.1 From: Alejandro Lucero Use cxl api for creating a region using the endpoint decoder related to a DPA range. Add a callback for unwinding sfc cxl initialization when the endpoint port is destroyed by potential cxl_acpi or cxl_mem modules removal. Signed-off-by: Alejandro Lucero --- drivers/cxl/core/core.h | 5 ----- drivers/net/ethernet/sfc/efx_cxl.c | 22 ++++++++++++++++++++++ include/cxl/cxl.h | 8 ++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index c4dddbec5d6e..83abaca9f418 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -14,11 +14,6 @@ extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; -enum cxl_detach_mode { - DETACH_ONLY, - DETACH_INVALIDATE, -}; - #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 4461b7a4dc2c..85490afc7930 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -14,6 +14,16 @@ #define EFX_CTPIO_BUFFER_SIZE SZ_256M +static void efx_release_cxl_region(void *priv_cxl) +{ + struct efx_probe_data *probe_data = priv_cxl; + struct efx_cxl *cxl = probe_data->cxl; + + probe_data->cxl_pio_initialised = false; + iounmap(cxl->ctpio_cxl); + cxl_put_root_decoder(cxl->cxlrd); +} + int efx_cxl_init(struct efx_probe_data *probe_data) { struct efx_nic *efx = &probe_data->efx; @@ -118,6 +128,16 @@ int efx_cxl_init(struct efx_probe_data *probe_data) goto err_decoder; } + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1, + efx_release_cxl_region, + &probe_data); + if (IS_ERR(cxl->efx_region)) { + pci_err(pci_dev, "CXL accel create region failed"); + cxl_dpa_free(cxl->cxled); + rc = PTR_ERR(cxl->efx_region); + goto err_decoder; + } + probe_data->cxl = cxl; cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); @@ -135,6 +155,8 @@ int efx_cxl_init(struct efx_probe_data *probe_data) void efx_cxl_exit(struct efx_probe_data *probe_data) { if (probe_data->cxl) { + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, + DETACH_INVALIDATE); cxl_dpa_free(probe_data->cxl->cxled); cxl_put_root_decoder(probe_data->cxl->cxlrd); } diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index dbacefff8d60..e82f94921b5b 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -282,4 +282,12 @@ struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, struct cxl_endpoint_decoder **cxled, int ways, void (*action)(void *), void *data); +enum cxl_detach_mode { + DETACH_ONLY, + DETACH_INVALIDATE, +}; + +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero A CXL region struct contains the physical address to work with. Type2 drivers can create a CXL region but have not access to the related struct as it is defined as private by the kernel CXL core. Add a function for getting the cxl region range to be used for mapping such memory range by a Type2 driver. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang --- drivers/cxl/core/region.c | 23 +++++++++++++++++++++++ include/cxl/cxl.h | 2 ++ 2 files changed, 25 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e39f272dd445..97b2fb68e029 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2758,6 +2758,29 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(rc); } +/** + * cxl_get_region_range - obtain range linked to a CXL region + * + * @region: a pointer to struct cxl_region + * @range: a pointer to a struct range to be set + * + * Returns 0 or error. + */ +int cxl_get_region_range(struct cxl_region *region, struct range *range) +{ + if (WARN_ON_ONCE(!region)) + return -ENODEV; + + if (!region->params.res) + return -ENOSPC; + + range->start = region->params.res->start; + range->end = region->params.res->end; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_region_range, "CXL"); + static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) { return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index e82f94921b5b..673a0aeec086 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -290,4 +290,6 @@ enum cxl_detach_mode { int cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, enum cxl_detach_mode mode); +struct range; +int cxl_get_region_range(struct cxl_region *region, struct range *range); #endif /* __CXL_CXL_H__ */ -- 2.34.1 From: Alejandro Lucero A PIO buffer is a region of device memory to which the driver can write a packet for TX, with the device handling the transmit doorbell without requiring a DMA for getting the packet data, which helps reducing latency in certain exchanges. With CXL mem protocol this latency can be lowered further. With a device supporting CXL and successfully initialised, use the cxl region to map the memory range and use this mapping for PIO buffers. Add the disabling of those CXL-based PIO buffers if the callback for potential cxl endpoint removal by the CXL code happens. Signed-off-by: Alejandro Lucero --- drivers/net/ethernet/sfc/ef10.c | 62 ++++++++++++++++++++++++--- drivers/net/ethernet/sfc/efx.h | 1 + drivers/net/ethernet/sfc/efx_cxl.c | 32 ++++++++++++-- drivers/net/ethernet/sfc/net_driver.h | 2 + drivers/net/ethernet/sfc/nic.h | 3 ++ 5 files changed, 90 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 47349c148c0c..7bc854e2d22a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -24,6 +24,7 @@ #include #include #include +#include "efx_cxl.h" /* Hardware control for EF10 architecture including 'Huntington'. */ @@ -106,7 +107,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V4_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V7_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -177,6 +178,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) efx->num_mac_stats); } + if (outlen < MC_CMD_GET_CAPABILITIES_V7_OUT_LEN) + nic_data->datapath_caps3 = 0; + else + nic_data->datapath_caps3 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V7_OUT_FLAGS3); + return 0; } @@ -771,6 +778,18 @@ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n) return rc; } +#ifdef CONFIG_SFC_CXL +void efx_ef10_disable_piobufs(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_for_each_channel_tx_queue(tx_queue, channel) + tx_queue->piobuf = NULL; +} +#endif + static int efx_ef10_link_piobufs(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; @@ -919,6 +938,9 @@ static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) static void efx_ef10_remove(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif int rc; #ifdef CONFIG_SFC_SRIOV @@ -949,7 +971,12 @@ static void efx_ef10_remove(struct efx_nic *efx) efx_mcdi_rx_free_indir_table(efx); +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if (nic_data->wc_membase && !probe_data->cxl_pio_in_use) +#else if (nic_data->wc_membase) +#endif iounmap(nic_data->wc_membase); rc = efx_mcdi_free_vis(efx); @@ -1140,6 +1167,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) unsigned int channel_vis, pio_write_vi_base, max_vis; struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int uc_mem_map_size, wc_mem_map_size; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif void __iomem *membase; int rc; @@ -1263,8 +1293,25 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) iounmap(efx->membase); efx->membase = membase; - /* Set up the WC mapping if needed */ - if (wc_mem_map_size) { + if (!wc_mem_map_size) + goto skip_pio; + + /* Set up the WC mapping */ + +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if ((nic_data->datapath_caps3 & + (1 << MC_CMD_GET_CAPABILITIES_V7_OUT_CXL_CONFIG_ENABLE_LBN)) && + probe_data->cxl_pio_initialised) { + /* Using PIO through CXL mapping? */ + nic_data->pio_write_base = probe_data->cxl->ctpio_cxl + + (pio_write_vi_base * efx->vi_stride + + ER_DZ_TX_PIOBUF - uc_mem_map_size); + probe_data->cxl_pio_in_use = true; + } else +#endif + { + /* Using legacy PIO BAR mapping */ nic_data->wc_membase = ioremap_wc(efx->membase_phys + uc_mem_map_size, wc_mem_map_size); @@ -1279,12 +1326,13 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) nic_data->wc_membase + (pio_write_vi_base * efx->vi_stride + ER_DZ_TX_PIOBUF - uc_mem_map_size); - - rc = efx_ef10_link_piobufs(efx); - if (rc) - efx_ef10_free_piobufs(efx); } + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + +skip_pio: netif_dbg(efx, probe, efx->net_dev, "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n", &efx->membase_phys, efx->membase, uc_mem_map_size, diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 45e191686625..37fd1cf96582 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -237,4 +237,5 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem) int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, bool flush); +void efx_ef10_disable_piobufs(struct efx_nic *efx); #endif /* EFX_EFX_H */ diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 85490afc7930..3dde59003cd9 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -11,16 +11,23 @@ #include "net_driver.h" #include "efx_cxl.h" +#include "efx.h" #define EFX_CTPIO_BUFFER_SIZE SZ_256M static void efx_release_cxl_region(void *priv_cxl) { struct efx_probe_data *probe_data = priv_cxl; + struct efx_nic *efx = &probe_data->efx; struct efx_cxl *cxl = probe_data->cxl; + /* Next avoid contention with efx_cxl_exit() */ probe_data->cxl_pio_initialised = false; + + /* Next makes cxl-based piobus to no be used */ + efx_ef10_disable_piobufs(efx); iounmap(cxl->ctpio_cxl); + cxl_put_root_decoder(cxl->cxlrd); } @@ -30,6 +37,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) struct pci_dev *pci_dev = efx->pci_dev; resource_size_t max_size; struct efx_cxl *cxl; + struct range range; u16 dvsec; int rc; @@ -133,17 +141,34 @@ int efx_cxl_init(struct efx_probe_data *probe_data) &probe_data); if (IS_ERR(cxl->efx_region)) { pci_err(pci_dev, "CXL accel create region failed"); - cxl_dpa_free(cxl->cxled); rc = PTR_ERR(cxl->efx_region); - goto err_decoder; + goto err_dpa; + } + + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, "CXL getting regions params failed"); + goto err_detach; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + rc = -ENOMEM; + goto err_detach; } probe_data->cxl = cxl; + probe_data->cxl_pio_initialised = true; cxl_release_endpoint(cxl->cxlmd, cxl->endpoint); return 0; +err_detach: + cxl_decoder_detach(NULL, cxl->cxled, 0, DETACH_INVALIDATE); +err_dpa: + cxl_dpa_free(cxl->cxled); err_decoder: cxl_put_root_decoder(cxl->cxlrd); err_release: @@ -154,7 +179,8 @@ int efx_cxl_init(struct efx_probe_data *probe_data) void efx_cxl_exit(struct efx_probe_data *probe_data) { - if (probe_data->cxl) { + if (probe_data->cxl_pio_initialised) { + iounmap(probe_data->cxl->ctpio_cxl); cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, DETACH_INVALIDATE); cxl_dpa_free(probe_data->cxl->cxled); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 0e685b8a9980..894b62d6ada9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1209,6 +1209,7 @@ struct efx_cxl; * @efx: Efx NIC details * @cxl: details of related cxl objects * @cxl_pio_initialised: cxl initialization outcome. + * @cxl_pio_in_use: PIO using CXL mapping */ struct efx_probe_data { struct pci_dev *pci_dev; @@ -1216,6 +1217,7 @@ struct efx_probe_data { #ifdef CONFIG_SFC_CXL struct efx_cxl *cxl; bool cxl_pio_initialised; + bool cxl_pio_in_use; #endif }; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 9fa5c4c713ab..c87cc9214690 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -152,6 +152,8 @@ enum { * %MC_CMD_GET_CAPABILITIES response) * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps3: Further Capabilities of datapath firmware (FLAGS3 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot @@ -186,6 +188,7 @@ struct efx_ef10_nic_data { bool must_check_datapath_caps; u32 datapath_caps; u32 datapath_caps2; + u32 datapath_caps3; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; bool must_probe_vswitching; -- 2.34.1