From: Manish Honap Check media readiness at probe time and create a CXL memdev for region management. Media/range-active check is performed at probe time to keep the vfio-is-advertised-as-cxl behavior consistent. A pre-committed HDM decoder already implies media is active, so set media_ready directly instead of calling the potentially blocking cxl_await_range_active(). For memdev creation we need to determine capacity before calling devm_cxl_add_memdev(). Read the committed decoder size directly from HDM decoder hardware registers; the CXL core will see the same values when it enumerates decoders inside add_memdev. For firmware uncommitted decoders, handling will be added in a later commit. Signed-off-by: Manish Honap --- drivers/vfio/pci/cxl/vfio_cxl_core.c | 67 +++++++++++++++++++++++++++- drivers/vfio/pci/cxl/vfio_cxl_emu.c | 48 ++++++++++++++++++++ drivers/vfio/pci/cxl/vfio_cxl_priv.h | 2 + 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c index d2401871489d..15b6c0d75d9e 100644 --- a/drivers/vfio/pci/cxl/vfio_cxl_core.c +++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c @@ -132,6 +132,37 @@ static int vfio_cxl_setup_regs(struct vfio_pci_core_device *vdev) return 0; } +static int vfio_cxl_create_memdev(struct vfio_pci_core_device *vdev, + resource_size_t capacity) +{ + struct vfio_pci_cxl_state *cxl = vdev->cxl; + struct pci_dev *pdev = vdev->pdev; + int ret; + + ret = cxl_set_capacity(&cxl->cxlds, capacity); + if (ret) { + pci_err(pdev, "Failed to set capacity: %d\n", ret); + return ret; + } + + pci_dbg(pdev, "Device capacity: %llu MB (from %s)\n", + capacity >> 20, + cxl->precommitted ? "committed decoder" : "sysfs"); + pci_dbg(pdev, + "vfio_cxl: creating memdev: capacity=0x%llx bytes (%llu MiB)\n", + (unsigned long long)capacity, + (unsigned long long)(capacity >> 20)); + + cxl->cxlmd = devm_cxl_add_memdev(&cxl->cxlds, NULL); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pdev, "Failed to add CXL memdev: %ld\n", + PTR_ERR(cxl->cxlmd)); + return PTR_ERR(cxl->cxlmd); + } + + return 0; +} + int vfio_cxl_create_cxl_region(struct vfio_pci_core_device *vdev, resource_size_t size) { struct vfio_pci_cxl_state *cxl = vdev->cxl; @@ -250,6 +281,7 @@ void vfio_pci_cxl_detect_and_init(struct vfio_pci_core_device *vdev) { struct pci_dev *pdev = vdev->pdev; struct vfio_pci_cxl_state *cxl; + resource_size_t capacity = 0; u16 dvsec; int ret; @@ -282,13 +314,44 @@ void vfio_pci_cxl_detect_and_init(struct vfio_pci_core_device *vdev) goto failed; } + cxl->cxlds.media_ready = !cxl_await_range_active(&cxl->cxlds); + if (!cxl->cxlds.media_ready) { + pci_disable_device(pdev); + pci_err(pdev, "CXL media not ready\n"); + goto regs_failed; + } + + /* + * Take the single authoritative HDM decoder snapshot now that + * MEM_ACTIVE is confirmed and BAR memory is still enabled. Using + * readl() per-dword ensures correct MMIO serialisation and captures + * the final firmware-written values for all fields including SIZE_HIGH, + * which firmware commits to the BAR at MEM_ACTIVE time. + */ + vfio_cxl_reinit_comp_regs(vdev); + pci_disable_device(pdev); - ret = vfio_cxl_create_region_helper(vdev, SZ_256M); - if (ret) + capacity = vfio_cxl_read_committed_decoder_size(vdev); + if (capacity == 0) { + /* + * TODO: Add handling for devices which do not have + * firmware pre-committed decoders + */ + pci_info(pdev, "Uncommitted region size must be configured via sysfs before bind\n"); goto regs_failed; + } cxl->precommitted = true; + cxl->dpa_size = capacity; + + ret = vfio_cxl_create_memdev(vdev, capacity); + if (ret) + goto regs_failed; + + ret = vfio_cxl_create_region_helper(vdev, capacity); + if (ret) + goto regs_failed; return; diff --git a/drivers/vfio/pci/cxl/vfio_cxl_emu.c b/drivers/vfio/pci/cxl/vfio_cxl_emu.c index d5603c80fe51..178a42267642 100644 --- a/drivers/vfio/pci/cxl/vfio_cxl_emu.c +++ b/drivers/vfio/pci/cxl/vfio_cxl_emu.c @@ -300,6 +300,54 @@ int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev) return 0; } +/* + * vfio_cxl_read_committed_decoder_size - Extract committed DPA capacity from + * comp_reg_virt[]. + * + * Called from probe context after vfio_cxl_reinit_comp_regs() has taken the + * post-MEM_ACTIVE readl() snapshot and patched SIZE_HIGH/SIZE_LOW from DVSEC. + * comp_reg_virt[] is already correct at this point; no hardware access needed. + * + * Returns the committed DPA capacity in bytes, or 0 if the decoder is not + * committed. + */ +resource_size_t +vfio_cxl_read_committed_decoder_size(struct vfio_pci_core_device *vdev) +{ + struct vfio_pci_cxl_state *cxl = vdev->cxl; + struct pci_dev *pdev = vdev->pdev; + resource_size_t capacity; + u32 ctrl, sz_hi, sz_lo; + + if (WARN_ON(!cxl || !cxl->comp_reg_virt)) + return 0; + + ctrl = le32_to_cpu(cxl->comp_reg_virt[CXL_HDM_DECODER0_CTRL_OFFSET(0) / + CXL_REG_SIZE_DWORD]); + sz_hi = le32_to_cpu(cxl->comp_reg_virt[CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(0) / + CXL_REG_SIZE_DWORD]); + sz_lo = le32_to_cpu(cxl->comp_reg_virt[CXL_HDM_DECODER0_SIZE_LOW_OFFSET(0) / + CXL_REG_SIZE_DWORD]); + + if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) { + pci_dbg(pdev, + "vfio_cxl: decoder0 not committed: ctrl=0x%08x\n", + ctrl); + return 0; + } + + capacity = ((resource_size_t)sz_hi << 32) | (sz_lo & GENMASK(31, 28)); + + pci_dbg(pdev, + "vfio_cxl: decoder0 committed: sz_hi=0x%08x sz_lo=0x%08x " + "capacity=0x%llx (%llu MiB)\n", + sz_hi, sz_lo, + (unsigned long long)capacity, + (unsigned long long)(capacity >> 20)); + + return capacity; +} + /* * Called with memory_lock write side held (from vfio_cxl_reactivate_region). * Uses the pre-established hdm_iobase, no ioremap() under the lock, diff --git a/drivers/vfio/pci/cxl/vfio_cxl_priv.h b/drivers/vfio/pci/cxl/vfio_cxl_priv.h index 4f2637874e9d..3ef8d923a7e8 100644 --- a/drivers/vfio/pci/cxl/vfio_cxl_priv.h +++ b/drivers/vfio/pci/cxl/vfio_cxl_priv.h @@ -26,6 +26,7 @@ struct vfio_pci_cxl_state { resource_size_t comp_reg_offset; size_t comp_reg_size; __le32 *comp_reg_virt; + size_t dpa_size; void __iomem *hdm_iobase; u32 hdm_count; int dpa_region_idx; @@ -81,5 +82,6 @@ struct vfio_pci_cxl_state { int vfio_cxl_setup_virt_regs(struct vfio_pci_core_device *vdev); void vfio_cxl_clean_virt_regs(struct vfio_pci_core_device *vdev); void vfio_cxl_reinit_comp_regs(struct vfio_pci_core_device *vdev); +resource_size_t vfio_cxl_read_committed_decoder_size(struct vfio_pci_core_device *vdev); #endif /* __LINUX_VFIO_CXL_PRIV_H */ -- 2.25.1