pcie_tph_get_st_table_loc() incorrectly uses FIELD_GET(), which shifts the field value to bit 0. But the function is designed to return raw PCI_TPH_LOC_* values as defined in the function comment. This causes incorrect ST table location detection. Fix it by using bitwise AND with PCI_TPH_CAP_LOC_MASK to return the unshifted field value matching the function specification. This doesn't make a difference to mlx5_st_create(), the lone external caller, because it only checks for PCI_TPH_LOC_NONE (0), but will be needed for callers that check for PCI_TPH_LOC_CAP or PCI_TPH_LOC_MSIX. Fixes: d2e8a34876ce ("PCI/TPH: Add Steering Tag support") Cc: stable@vger.kernel.org Signed-off-by: Chengwen Feng Reviewed-by: Alex Williamson Reviewed-by: Bjorn Helgaas --- drivers/pci/tph.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 91145e8d9d95..877cf556242b 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -170,7 +170,7 @@ u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); - return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg); + return reg & PCI_TPH_CAP_LOC_MASK; } EXPORT_SYMBOL(pcie_tph_get_st_table_loc); @@ -185,9 +185,6 @@ u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) /* Check ST table location first */ loc = pcie_tph_get_st_table_loc(pdev); - - /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */ - loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); if (loc != PCI_TPH_LOC_CAP) return 0; @@ -316,8 +313,6 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE); loc = pcie_tph_get_st_table_loc(pdev); - /* Convert loc to match with PCI_TPH_LOC_* */ - loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); switch (loc) { case PCI_TPH_LOC_MSIX: -- 2.17.1 Export the helper to retrieve supported PCIe TPH steering tag modes so that drivers like VFIO can query and expose device capabilities to userspace. Add stub functions for pcie_tph_get_st_table_size() and pcie_tph_get_st_table_loc() when !CONFIG_PCIE_TPH. Add tph_cap validation for pcie_tph_get_st_modes() and pcie_tph_get_st_table_loc() to prevent invalid PCI configuration space access when TPH is not supported. Signed-off-by: Chengwen Feng Acked-by: Bjorn Helgaas --- drivers/pci/tph.c | 19 +++++++++++++++++-- include/linux/pci-tph.h | 7 +++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 877cf556242b..ba31b010f67a 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -145,15 +145,27 @@ static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type) pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); } -static u8 get_st_modes(struct pci_dev *pdev) +/** + * pcie_tph_get_st_modes - Get supported Steering Tag modes + * @pdev: PCI device to query + * + * Return: + * Bitmask of supported ST modes (PCI_TPH_CAP_ST_NS, PCI_TPH_CAP_ST_IV, + * PCI_TPH_CAP_ST_DS) + */ +u8 pcie_tph_get_st_modes(struct pci_dev *pdev) { u32 reg; + if (!pdev->tph_cap) + return 0; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS; return reg; } +EXPORT_SYMBOL(pcie_tph_get_st_modes); /** * pcie_tph_get_st_table_loc - Return the device's ST table location @@ -168,6 +180,9 @@ u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) { u32 reg; + if (!pdev->tph_cap) + return PCI_TPH_LOC_NONE; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); return reg & PCI_TPH_CAP_LOC_MASK; @@ -395,7 +410,7 @@ int pcie_enable_tph(struct pci_dev *pdev, int mode) /* Sanitize and check ST mode compatibility */ mode &= PCI_TPH_CTRL_MODE_SEL_MASK; - dev_modes = get_st_modes(pdev); + dev_modes = pcie_tph_get_st_modes(pdev); if (!((1 << mode) & dev_modes)) return -EINVAL; diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index be68cd17f2f8..5772d48ea444 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -30,6 +30,7 @@ void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); u16 pcie_tph_get_st_table_size(struct pci_dev *pdev); u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev); +u8 pcie_tph_get_st_modes(struct pci_dev *pdev); #else static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) @@ -41,6 +42,12 @@ static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } +static inline u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) +{ return 0; } +static inline u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) +{ return 0; } +static inline u8 pcie_tph_get_st_modes(struct pci_dev *pdev) +{ return 0; } #endif #endif /* LINUX_PCI_TPH_H */ -- 2.17.1 Add a helper to query enabled TPH mode on a PCI device. This is useful for drivers like VFIO-PCI that need to validate TPH state before allowing access to steering tag tables. Signed-off-by: Chengwen Feng --- drivers/pci/tph.c | 12 ++++++++++++ include/linux/pci-tph.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index ba31b010f67a..91c1e83410a3 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -451,6 +451,18 @@ int pcie_enable_tph(struct pci_dev *pdev, int mode) } EXPORT_SYMBOL(pcie_enable_tph); +/** + * pcie_tph_enabled_mode - Get current enabled TPH mode + * @pdev: PCI device + * + * Return the enabled TPH mode (IV/DS) or 0 if disabled. + */ +int pcie_tph_enabled_mode(struct pci_dev *pdev) +{ + return pdev->tph_enabled ? pdev->tph_mode : 0; +} +EXPORT_SYMBOL(pcie_tph_enabled_mode); + void pci_restore_tph_state(struct pci_dev *pdev) { struct pci_cap_saved_state *save_state; diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index 5772d48ea444..28d0fa762146 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -28,6 +28,7 @@ int pcie_tph_get_cpu_st(struct pci_dev *dev, unsigned int cpu, u16 *tag); void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); +int pcie_tph_enabled_mode(struct pci_dev *pdev); u16 pcie_tph_get_st_table_size(struct pci_dev *pdev); u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev); u8 pcie_tph_get_st_modes(struct pci_dev *pdev); @@ -42,6 +43,8 @@ static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } +static inline int pcie_tph_enabled_mode(struct pci_dev *pdev) +{ return 0; } static inline u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) { return 0; } static inline u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) -- 2.17.1 Add support for virtualizing the PCIe TPH (Transaction Processing Hints) control register. TPH may break platform isolation, so add a module parameter "enable_unsafe_tph" to allow administrators to globally control this feature. TPH control register writes are mediated to only allow valid mode settings, and TPH is automatically disabled when VFIO takes ownership of the device or when userspace closes the device file descriptor. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci.c | 13 +++++++++++- drivers/vfio/pci/vfio_pci_config.c | 33 ++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_core.c | 12 ++++++++++- include/linux/vfio_pci_core.h | 3 ++- 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 0c771064c0b8..6d73668459cf 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -60,6 +60,12 @@ static bool disable_denylist; module_param(disable_denylist, bool, 0444); MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); +#ifdef CONFIG_PCIE_TPH +static bool enable_unsafe_tph; +module_param(enable_unsafe_tph, bool, 0444); +MODULE_PARM_DESC(enable_unsafe_tph, "Enable PCIe TPH (Transaction Processing Hints) support. It may break platform isolation. If you do not know what this is for, step away. (default: false)"); +#endif + static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) { switch (pdev->vendor) { @@ -257,12 +263,17 @@ static int __init vfio_pci_init(void) { int ret; bool is_disable_vga = true; + bool is_enable_unsafe_tph = false; #ifdef CONFIG_VFIO_PCI_VGA is_disable_vga = disable_vga; #endif +#ifdef CONFIG_PCIE_TPH + is_enable_unsafe_tph = enable_unsafe_tph; +#endif - vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3); + vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3, + is_enable_unsafe_tph); /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index a10ed733f0e3..efb413ce7817 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -35,6 +36,8 @@ ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ (offset >= PCI_ROM_ADDRESS && offset < PCI_ROM_ADDRESS + 4)) +extern bool enable_unsafe_tph; + /* * Lengths of PCI Config Capabilities * 0: Removed from the user visible capability list @@ -313,6 +316,35 @@ static int vfio_virt_config_read(struct vfio_pci_core_device *vdev, int pos, return count; } +static int vfio_pci_tph_config_write(struct vfio_pci_core_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + u32 data = le32_to_cpu(val); + int ret; + + if (!enable_unsafe_tph) + return -EOPNOTSUPP; + + if (count != 4 || offset != PCI_TPH_CTRL) + return -EINVAL; + + /* Only permit write TPH mode. */ + data &= PCI_TPH_CTRL_MODE_SEL_MASK; + if (data > PCI_TPH_ST_DS_MODE) + return -EINVAL; + + if (data == PCI_TPH_ST_NS_MODE) { + pcie_disable_tph(vdev->pdev); + return 4; + } + + ret = pcie_enable_tph(vdev->pdev, data); + if (ret) + return -EIO; + return 4; +} + static struct perm_bits direct_ro_perms = { .readfn = vfio_direct_config_read, }; @@ -1121,6 +1153,7 @@ int __init vfio_pci_init_perm_bits(void) ret |= init_pci_ext_cap_err_perm(&ecap_perms[PCI_EXT_CAP_ID_ERR]); ret |= init_pci_ext_cap_pwr_perm(&ecap_perms[PCI_EXT_CAP_ID_PWR]); ecap_perms[PCI_EXT_CAP_ID_VNDR].writefn = vfio_raw_config_write; + ecap_perms[PCI_EXT_CAP_ID_TPH].writefn = vfio_pci_tph_config_write; ecap_perms[PCI_EXT_CAP_ID_DVSEC].writefn = vfio_raw_config_write; if (ret) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3f8d093aacf8..cc13fc8eea9d 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -29,6 +29,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_EEH) #include #endif @@ -41,6 +42,7 @@ static bool nointxmask; static bool disable_vga; static bool disable_idle_d3; +bool enable_unsafe_tph; static void vfio_pci_eventfd_rcu_free(struct rcu_head *rcu) { @@ -736,6 +738,9 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) #endif vfio_pci_dma_buf_cleanup(vdev); + /* Disable TPH when userspace closes the device FD */ + pcie_disable_tph(vdev->pdev); + vfio_pci_core_disable(vdev); mutex_lock(&vdev->igate); @@ -2205,6 +2210,9 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) if (!disable_idle_d3) pm_runtime_put(dev); + /* Disable TPH when taking over ownership of the device */ + pcie_disable_tph(pdev); + ret = vfio_register_group_dev(&vdev->vdev); if (ret) goto out_power; @@ -2570,11 +2578,13 @@ static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) } void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga, - bool is_disable_idle_d3) + bool is_disable_idle_d3, + bool is_enable_unsafe_tph) { nointxmask = is_nointxmask; disable_vga = is_disable_vga; disable_idle_d3 = is_disable_idle_d3; + enable_unsafe_tph = is_enable_unsafe_tph; } EXPORT_SYMBOL_GPL(vfio_pci_core_set_params); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 2ebba746c18f..33e7cd1dae87 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -157,7 +157,8 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, - bool is_disable_idle_d3); + bool is_disable_idle_d3, + bool is_enable_unsafe_tph); void vfio_pci_core_close_device(struct vfio_device *core_vdev); int vfio_pci_core_init_dev(struct vfio_device *core_vdev); void vfio_pci_core_release_dev(struct vfio_device *core_vdev); -- 2.17.1 Add VFIO_DEVICE_FEATURE_TPH_ST to allow userspace to manage PCIe TPH Steering Tag entries. SET: Program contiguous ST entries when ST table resides in TPH Capability or MSI-X table. U32_MAX CPU ID clears the corresponding ST entry. GET: Retrieve ST values per CPU ID, only available in DS mode. Both operations are only valid when TPH is enabled on the device. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci_core.c | 87 ++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 41 +++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index cc13fc8eea9d..298e7dd136fd 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1521,6 +1521,91 @@ static int vfio_pci_core_feature_token(struct vfio_pci_core_device *vdev, return 0; } +static int vfio_pci_core_feature_tph_st(struct vfio_pci_core_device *vdev, + u32 flags, + struct vfio_device_feature_tph_st *arg, + size_t argsz) +{ + bool is_set = !!(flags & VFIO_DEVICE_FEATURE_SET); + struct vfio_device_feature_tph_st tph_st; + struct pci_dev *pdev = vdev->pdev; + enum tph_mem_type mtype; + int i, j, ret; + u32 *cpus; + u16 st; + + if (!enable_unsafe_tph || + pcie_tph_enabled_mode(pdev) == PCI_TPH_ST_NS_MODE) + return -EOPNOTSUPP; + if (!is_set && pcie_tph_enabled_mode(pdev) != PCI_TPH_ST_DS_MODE) + return -EOPNOTSUPP; + if (is_set && pcie_tph_get_st_table_loc(pdev) == PCI_TPH_LOC_NONE) + return -EOPNOTSUPP; + + ret = vfio_check_feature(flags, argsz, + VFIO_DEVICE_FEATURE_GET | + VFIO_DEVICE_FEATURE_SET, + sizeof(tph_st)); + if (ret != 1) + return -EINVAL; + + if (copy_from_user(&tph_st, arg, sizeof(tph_st))) + return -EFAULT; + + if (tph_st.count == 0 || tph_st.count > VFIO_TPH_ST_MAX_COUNT || + tph_st.flags > VFIO_TPH_ST_MEM_TYPE_PM) + return -EINVAL; + if (is_set && (tph_st.index >= VFIO_TPH_ST_MAX_COUNT || + tph_st.index + tph_st.count > VFIO_TPH_ST_MAX_COUNT)) + return -EINVAL; + if (!is_set && tph_st.index != 0) + return -EINVAL; + + cpus = memdup_array_user(&arg->data, tph_st.count, sizeof(*cpus)); + if (IS_ERR(cpus)) + return PTR_ERR(cpus); + + mtype = tph_st.flags & VFIO_TPH_ST_MEM_TYPE_PM ? TPH_MEM_TYPE_PM : + TPH_MEM_TYPE_VM; + if (!is_set) { + for (i = 0; i < tph_st.count; i++) { + ret = pcie_tph_get_cpu_st(pdev, mtype, cpus[i], &st); + if (ret) + goto out; + cpus[i] = st; + } + goto out; + } + + for (i = 0; i < tph_st.count; i++) { + if (cpus[i] == U32_MAX) { + ret = pcie_tph_set_st_entry(pdev, tph_st.index + i, 0); + if (ret) + goto out; + continue; + } + + ret = pcie_tph_get_cpu_st(pdev, mtype, cpus[i], &st); + if (ret) + goto out; + ret = pcie_tph_set_st_entry(pdev, tph_st.index + i, st); + if (ret) + goto out; + } + +out: + if (!is_set && !ret) + ret = copy_to_user(&arg->data, cpus, + tph_st.count * sizeof(*cpus)); + if (is_set && ret) { + /* Roll back previously programmed entries to 0 */ + for (j = 0; j < i; j++) + pcie_tph_set_st_entry(pdev, tph_st.index + j, 0); + } + kfree(cpus); + return ret; +} + int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) { @@ -1539,6 +1624,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags, return vfio_pci_core_feature_token(vdev, flags, arg, argsz); case VFIO_DEVICE_FEATURE_DMA_BUF: return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz); + case VFIO_DEVICE_FEATURE_TPH_ST: + return vfio_pci_core_feature_tph_st(vdev, flags, arg, argsz); default: return -ENOTTY; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5de618a3a5ee..aca39d4e5307 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1534,6 +1534,47 @@ struct vfio_device_feature_dma_buf { */ #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12 +/** + * VFIO_DEVICE_FEATURE_TPH_ST - Get/Set PCIe TPH Steering Tag (ST) entries + * + * Provides userspace interface to manage PCIe TPH ST table entries. + * This feature is only available when device TPH is enabled. + * + * Upon VFIO_DEVICE_FEATURE_SET: + * Program contiguous ST table entries from the starting @index. + * Valid only for hardware with ST table located in TPH Capability + * space or MSI-X table. If an entry CPU ID is specified as U32_MAX, + * the corresponding ST entry will be cleared. @index and @count define + * the contiguous entry range to be programmed. + * If any entry programming fails, the operation will roll back and + * clear all entries that were successfully programmed before the error. + * + * Upon VFIO_DEVICE_FEATURE_GET: + * Retrieve the ST value mapped to each given CPU ID in the @data array. + * Userspace fills @data with CPU ID array, the interface returns each + * CPU's corresponding ST value back in place. + * Valid only when TPH DS mode is enabled. + * + * @flags: Operation flags (VFIO_TPH_ST_MEM_TYPE_*). + * @index: Starting ST entry index, only valid for FEATURE_SET. + * @count: Number of contiguous entries to access. + * @data: Array of CPU IDs for both SET and GET. On SET it programs ST for + * each CPU; on GET it returns the mapped ST value of each CPU. + * + * This feature is gated by enable_unsafe_tph module parameter. + */ +#define VFIO_DEVICE_FEATURE_TPH_ST 13 + +struct vfio_device_feature_tph_st { + __u32 flags; +#define VFIO_TPH_ST_MEM_TYPE_VM (0U << 0) +#define VFIO_TPH_ST_MEM_TYPE_PM (1U << 0) + __u16 index; + __u16 count; +#define VFIO_TPH_ST_MAX_COUNT 2048 + __u32 data[] __counted_by(count); +}; + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- 2.17.1