pcie_tph_get_st_table_loc() incorrectly uses FIELD_GET(), which shifts the field value to bit 0. But the function is designed to return raw PCI_TPH_LOC_* values as defined in the function comment. This causes incorrect ST table location detection. Fix it by using bitwise AND with PCI_TPH_CAP_LOC_MASK to return the unshifted field value matching the function specification. While this change appears to be a no-op within tph.c, the external caller mlx5_st_create() relies on the documented function behavior, making this fix necessary. Fixes: d2e8a34876ce ("PCI/TPH: Add Steering Tag support") Cc: stable@vger.kernel.org Signed-off-by: Chengwen Feng Reviewed-by: Alex Williamson --- drivers/pci/tph.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 91145e8d9d95..f17b74b5fb1e 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -170,7 +170,7 @@ u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); - return FIELD_GET(PCI_TPH_CAP_LOC_MASK, reg); + return reg & PCI_TPH_CAP_LOC_MASK; } EXPORT_SYMBOL(pcie_tph_get_st_table_loc); @@ -183,11 +183,7 @@ u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) u32 reg; u32 loc; - /* Check ST table location first */ loc = pcie_tph_get_st_table_loc(pdev); - - /* Convert loc to match with PCI_TPH_LOC_* defined in pci_regs.h */ - loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); if (loc != PCI_TPH_LOC_CAP) return 0; @@ -316,8 +312,6 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) set_ctrl_reg_req_en(pdev, PCI_TPH_REQ_DISABLE); loc = pcie_tph_get_st_table_loc(pdev); - /* Convert loc to match with PCI_TPH_LOC_* */ - loc = FIELD_PREP(PCI_TPH_CAP_LOC_MASK, loc); switch (loc) { case PCI_TPH_LOC_MSIX: -- 2.17.1 Export the helper to retrieve supported PCIe TPH steering tag modes so that drivers like VFIO can query and expose device capabilities to userspace. Add stub functions for pcie_tph_get_st_table_size() and pcie_tph_get_st_table_loc() when !CONFIG_PCI_TPH. Add tph_cap validation for pcie_tph_get_st_modes() and pcie_tph_get_st_table_loc() to prevent invalid PCI configuration space access when TPH is not supported. Signed-off-by: Chengwen Feng Acked-by: Bjorn Helgaas --- drivers/pci/tph.c | 20 ++++++++++++++++++-- include/linux/pci-tph.h | 7 +++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index f17b74b5fb1e..ba31b010f67a 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -145,15 +145,27 @@ static void set_ctrl_reg_req_en(struct pci_dev *pdev, u8 req_type) pci_write_config_dword(pdev, pdev->tph_cap + PCI_TPH_CTRL, reg); } -static u8 get_st_modes(struct pci_dev *pdev) +/** + * pcie_tph_get_st_modes - Get supported Steering Tag modes + * @pdev: PCI device to query + * + * Return: + * Bitmask of supported ST modes (PCI_TPH_CAP_ST_NS, PCI_TPH_CAP_ST_IV, + * PCI_TPH_CAP_ST_DS) + */ +u8 pcie_tph_get_st_modes(struct pci_dev *pdev) { u32 reg; + if (!pdev->tph_cap) + return 0; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); reg &= PCI_TPH_CAP_ST_NS | PCI_TPH_CAP_ST_IV | PCI_TPH_CAP_ST_DS; return reg; } +EXPORT_SYMBOL(pcie_tph_get_st_modes); /** * pcie_tph_get_st_table_loc - Return the device's ST table location @@ -168,6 +180,9 @@ u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) { u32 reg; + if (!pdev->tph_cap) + return PCI_TPH_LOC_NONE; + pci_read_config_dword(pdev, pdev->tph_cap + PCI_TPH_CAP, ®); return reg & PCI_TPH_CAP_LOC_MASK; @@ -183,6 +198,7 @@ u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) u32 reg; u32 loc; + /* Check ST table location first */ loc = pcie_tph_get_st_table_loc(pdev); if (loc != PCI_TPH_LOC_CAP) return 0; @@ -394,7 +410,7 @@ int pcie_enable_tph(struct pci_dev *pdev, int mode) /* Sanitize and check ST mode compatibility */ mode &= PCI_TPH_CTRL_MODE_SEL_MASK; - dev_modes = get_st_modes(pdev); + dev_modes = pcie_tph_get_st_modes(pdev); if (!((1 << mode) & dev_modes)) return -EINVAL; diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h index be68cd17f2f8..586c75b19e01 100644 --- a/include/linux/pci-tph.h +++ b/include/linux/pci-tph.h @@ -30,6 +30,7 @@ void pcie_disable_tph(struct pci_dev *pdev); int pcie_enable_tph(struct pci_dev *pdev, int mode); u16 pcie_tph_get_st_table_size(struct pci_dev *pdev); u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev); +u8 pcie_tph_get_st_modes(struct pci_dev *pdev); #else static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) @@ -41,6 +42,12 @@ static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, static inline void pcie_disable_tph(struct pci_dev *pdev) { } static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) { return -EINVAL; } +static inline u16 pcie_tph_get_st_table_size(struct pci_dev *pdev) +{ return 0; } +static inline u32 pcie_tph_get_st_table_loc(struct pci_dev *pdev) +{ return 0x7FF; /* Values that do not appear in normal case */ } +static inline u8 pcie_tph_get_st_modes(struct pci_dev *pdev) +{ return 0; } #endif #endif /* LINUX_PCI_TPH_H */ -- 2.17.1 Add VFIO_DEVICE_PCI_TPH IOCTL to allow userspace to query device TPH capabilities, supported modes, and steering tag table information. Add module parameter 'enable_unsafe_tph_ds_mode' to restrict unsafe device-specific TPH mode to trusted userspace only. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci.c | 13 ++- drivers/vfio/pci/vfio_pci_core.c | 55 ++++++++++++- include/linux/vfio_pci_core.h | 3 +- include/uapi/linux/vfio.h | 131 +++++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 0c771064c0b8..40bf5aa9fd0b 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -60,6 +60,12 @@ static bool disable_denylist; module_param(disable_denylist, bool, 0444); MODULE_PARM_DESC(disable_denylist, "Disable use of device denylist. Disabling the denylist allows binding to devices with known errata that may lead to exploitable stability or security issues when accessed by untrusted users."); +#ifdef CONFIG_PCIE_TPH +static bool enable_unsafe_tph_ds_mode; +module_param(enable_unsafe_tph_ds_mode, bool, 0444); +MODULE_PARM_DESC(enable_unsafe_tph_ds_mode, "Enable UNSAFE TPH device-specific (DS) mode. This mode provides weak isolation, cannot be safely used for virtual machines. If you do not know what this is for, step away. (default: false)"); +#endif + static bool vfio_pci_dev_in_denylist(struct pci_dev *pdev) { switch (pdev->vendor) { @@ -257,12 +263,17 @@ static int __init vfio_pci_init(void) { int ret; bool is_disable_vga = true; + bool is_enable_unsafe_tph_ds_mode = false; #ifdef CONFIG_VFIO_PCI_VGA is_disable_vga = disable_vga; #endif +#ifdef CONFIG_PCIE_TPH + is_enable_unsafe_tph_ds_mode = enable_unsafe_tph_ds_mode; +#endif - vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3); + vfio_pci_core_set_params(nointxmask, is_disable_vga, disable_idle_d3, + is_enable_unsafe_tph_ds_mode); /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3f8d093aacf8..d98d04bad4a3 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -29,6 +29,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_EEH) #include #endif @@ -41,6 +42,7 @@ static bool nointxmask; static bool disable_vga; static bool disable_idle_d3; +static bool enable_unsafe_tph_ds_mode; static void vfio_pci_eventfd_rcu_free(struct rcu_head *rcu) { @@ -1461,6 +1463,53 @@ static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, ioeventfd.fd); } +static int vfio_pci_tph_get_cap(struct vfio_pci_core_device *vdev, + struct vfio_device_pci_tph_op *op, + void __user *uarg) +{ + struct pci_dev *pdev = vdev->pdev; + u8 mode = pcie_tph_get_st_modes(pdev); + struct vfio_pci_tph_cap cap = {0}; + + if (op->argsz < offsetof(struct vfio_device_pci_tph_op, cap) + + sizeof(struct vfio_pci_tph_cap)) + return -EINVAL; + + if (mode == 0 || mode == PCI_TPH_CAP_ST_NS) + return -EOPNOTSUPP; + + if (mode & PCI_TPH_CAP_ST_IV) + cap.supported_modes |= VFIO_PCI_TPH_MODE_IV; + if (mode & PCI_TPH_CAP_ST_DS) + cap.supported_modes |= VFIO_PCI_TPH_MODE_DS; + + if (pcie_tph_get_st_table_loc(pdev) != PCI_TPH_LOC_NONE) + cap.st_table_sz = pcie_tph_get_st_table_size(pdev); + + if (copy_to_user(uarg, &cap, sizeof(cap))) + return -EFAULT; + + return 0; +} + +static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, + void __user *uarg) +{ + struct vfio_device_pci_tph_op op = {0}; + size_t minsz = sizeof(op.argsz) + sizeof(op.op); + + if (copy_from_user(&op, uarg, minsz)) + return -EFAULT; + + switch (op.op) { + case VFIO_PCI_TPH_GET_CAP: + return vfio_pci_tph_get_cap(vdev, &op, uarg + minsz); + default: + /* Other ops are not implemented yet */ + return -EINVAL; + } +} + long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, unsigned long arg) { @@ -1483,6 +1532,8 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd, return vfio_pci_ioctl_reset(vdev, uarg); case VFIO_DEVICE_SET_IRQS: return vfio_pci_ioctl_set_irqs(vdev, uarg); + case VFIO_DEVICE_PCI_TPH: + return vfio_pci_ioctl_tph(vdev, uarg); default: return -ENOTTY; } @@ -2570,11 +2621,13 @@ static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set) } void vfio_pci_core_set_params(bool is_nointxmask, bool is_disable_vga, - bool is_disable_idle_d3) + bool is_disable_idle_d3, + bool is_enable_unsafe_tph_ds_mode) { nointxmask = is_nointxmask; disable_vga = is_disable_vga; disable_idle_d3 = is_disable_idle_d3; + enable_unsafe_tph_ds_mode = is_enable_unsafe_tph_ds_mode; } EXPORT_SYMBOL_GPL(vfio_pci_core_set_params); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 2ebba746c18f..5af2a2e04ca7 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -157,7 +157,8 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, - bool is_disable_idle_d3); + bool is_disable_idle_d3, + bool is_enable_unsafe_tph_ds_mode); void vfio_pci_core_close_device(struct vfio_device *core_vdev); int vfio_pci_core_init_dev(struct vfio_device *core_vdev); void vfio_pci_core_release_dev(struct vfio_device *core_vdev); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 5de618a3a5ee..f899521e52c6 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -1321,6 +1321,137 @@ struct vfio_precopy_info { #define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * struct vfio_pci_tph_cap - PCIe TPH capability information + * @supported_modes: Supported TPH operating modes + * @st_table_sz: Number of entries in ST table; 0 means no ST table + * @reserved: Must be zero + * + * Used with VFIO_PCI_TPH_GET_CAP operation to return device + * TLP Processing Hints (TPH) capabilities to userspace. + */ +struct vfio_pci_tph_cap { + __u8 supported_modes; +#define VFIO_PCI_TPH_MODE_IV (1u << 0) /* Interrupt vector */ +#define VFIO_PCI_TPH_MODE_DS (1u << 1) /* Device specific */ + __u8 reserved0; + __u16 st_table_sz; + __u32 reserved; +}; + +/** + * struct vfio_pci_tph_ctrl - TPH enable control structure + * @mode: Selected TPH operating mode (VFIO_PCI_TPH_MODE_*) + * @reserved: Must be zero + * + * Used with VFIO_PCI_TPH_ENABLE operation to specify the + * operating mode when enabling TPH on the device. + */ +struct vfio_pci_tph_ctrl { + __u8 mode; + __u8 reserved[7]; +}; + +/** + * struct vfio_pci_tph_entry - Single TPH steering tag entry + * @cpu: CPU identifier for steering tag calculation + * @mem_type: Memory type (VFIO_PCI_TPH_MEM_TYPE_*) + * @reserved0: Must be zero + * @index: ST table index for programming + * @st: Unused for SET_ST + * @reserved1: Must be zero + * + * For VFIO_PCI_TPH_GET_ST: + * Userspace sets @cpu and @mem_type; kernel returns @st. + * + * For VFIO_PCI_TPH_SET_ST: + * Userspace sets @index, @cpu, and @mem_type. + * Kernel internally computes the steering tag and programs + * it into the specified @index. + * + * If @cpu == U32_MAX, kernel clears the steering tag at + * the specified @index. + */ +struct vfio_pci_tph_entry { + __u32 cpu; + __u8 mem_type; +#define VFIO_PCI_TPH_MEM_TYPE_VM 0 +#define VFIO_PCI_TPH_MEM_TYPE_PM 1 + __u8 reserved0; + __u16 index; + __u16 st; + __u16 reserved1; +}; + +/** + * struct vfio_pci_tph_st - Batch steering tag request + * @count: Number of entries in the array + * @reserved: Must be zero + * @ents: Flexible array of steering tag entries + * + * Container structure for batch get/set operations. + * Used with both VFIO_PCI_TPH_GET_ST and VFIO_PCI_TPH_SET_ST. + */ +struct vfio_pci_tph_st { + __u32 count; + __u32 reserved; + struct vfio_pci_tph_entry ents[]; +#define VFIO_PCI_TPH_MAX_ENTRIES 2048 +}; + +/** + * struct vfio_device_pci_tph_op - Argument for VFIO_DEVICE_PCI_TPH + * @argsz: User allocated size of this structure + * @op: TPH operation (VFIO_PCI_TPH_*) + * @cap: Capability data for GET_CAP + * @ctrl: Control data for ENABLE + * @st: Batch entry data for GET_ST/SET_ST + * + * @argsz must be set by the user to the size of the structure + * being executed. Kernel validates input and returns data + * only within the specified size. + * + * Operations: + * - VFIO_PCI_TPH_GET_CAP: Query device TPH capabilities. + * - VFIO_PCI_TPH_ENABLE: Enable TPH using mode from &ctrl. + * - VFIO_PCI_TPH_DISABLE: Disable TPH on the device. + * - VFIO_PCI_TPH_GET_ST: Retrieve CPU's steering tags. + * Valid only for Device-Specific mode and + * no ST table is present. + * - VFIO_PCI_TPH_SET_ST: Program steering tags into device table. + * If any entry fails, previously programmed entries + * are rolled back to 0 before returning error. + */ +struct vfio_device_pci_tph_op { + __u32 argsz; + __u32 op; +#define VFIO_PCI_TPH_GET_CAP 0 +#define VFIO_PCI_TPH_ENABLE 1 +#define VFIO_PCI_TPH_DISABLE 2 +#define VFIO_PCI_TPH_GET_ST 3 +#define VFIO_PCI_TPH_SET_ST 4 + union { + struct vfio_pci_tph_cap cap; + struct vfio_pci_tph_ctrl ctrl; + struct vfio_pci_tph_st st; + }; +}; + +/** + * VFIO_DEVICE_PCI_TPH - _IO(VFIO_TYPE, VFIO_BASE + 22) + * + * IOCTL for managing PCIe TLP Processing Hints (TPH) on + * a VFIO-assigned PCI device. Provides operations to query + * device capabilities, enable/disable TPH, retrieve CPU's + * steering tags, and program steering tag tables. + * + * Return: 0 on success, negative errno on failure. + * -EOPNOTSUPP: Operation not supported + * -ENODEV: Device or required functionality not present + * -EINVAL: Invalid argument or TPH not supported + */ +#define VFIO_DEVICE_PCI_TPH _IO(VFIO_TYPE, VFIO_BASE + 22) + /* * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power * state with the platform-based power management. Device use of lower power -- 2.17.1 Add support to enable and disable TPH function with mode selection. Restrict unsafe device-specific TPH mode to be allowed only when module parameter enable_unsafe_tph_ds_mode=1 is set. Disable TPH when taking over ownership of the device. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci_core.c | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index d98d04bad4a3..69f666d20c4a 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1492,6 +1492,43 @@ static int vfio_pci_tph_get_cap(struct vfio_pci_core_device *vdev, return 0; } +static int vfio_pci_tph_enable(struct vfio_pci_core_device *vdev, + struct vfio_device_pci_tph_op *op, + void __user *uarg) +{ + struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_tph_ctrl ctrl; + int mode; + + if (op->argsz < offsetofend(struct vfio_device_pci_tph_op, ctrl) + + sizeof(struct vfio_pci_tph_ctrl)) + return -EINVAL; + + if (copy_from_user(&ctrl, uarg, sizeof(ctrl))) + return -EFAULT; + + if (ctrl.mode != VFIO_PCI_TPH_MODE_IV && + ctrl.mode != VFIO_PCI_TPH_MODE_DS) + return -EINVAL; + + if (ctrl.mode == VFIO_PCI_TPH_MODE_DS && !enable_unsafe_tph_ds_mode) + return -EOPNOTSUPP; + + /* Reserved must be zero */ + if (memchr_inv(ctrl.reserved, 0, sizeof(ctrl.reserved))) + return -EINVAL; + + mode = (ctrl.mode == VFIO_PCI_TPH_MODE_IV) ? PCI_TPH_ST_IV_MODE : + PCI_TPH_ST_DS_MODE; + return pcie_enable_tph(pdev, mode); +} + +static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev) +{ + pcie_disable_tph(vdev->pdev); + return 0; +} + static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, void __user *uarg) { @@ -1504,6 +1541,10 @@ static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, switch (op.op) { case VFIO_PCI_TPH_GET_CAP: return vfio_pci_tph_get_cap(vdev, &op, uarg + minsz); + case VFIO_PCI_TPH_ENABLE: + return vfio_pci_tph_enable(vdev, &op, uarg + minsz); + case VFIO_PCI_TPH_DISABLE: + return vfio_pci_tph_disable(vdev); default: /* Other ops are not implemented yet */ return -EINVAL; @@ -2259,6 +2300,8 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) ret = vfio_register_group_dev(&vdev->vdev); if (ret) goto out_power; + /* Disable TPH when taking over ownership of the device */ + pcie_disable_tph(pdev); return 0; out_power: -- 2.17.1 Add support to batch get CPU steering tags for device-specific TPH mode that does not implement an ST table. This interface requires enabling the 'enable_unsafe_tph_ds_mode' module parameter. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci_core.c | 73 ++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 69f666d20c4a..45e641ab2a88 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1529,6 +1529,77 @@ static int vfio_pci_tph_disable(struct vfio_pci_core_device *vdev) return 0; } +static int vfio_pci_tph_get_st(struct vfio_pci_core_device *vdev, + struct vfio_device_pci_tph_op *op, + void __user *uarg) +{ + struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_tph_entry *ents; + struct vfio_pci_tph_st st; + enum tph_mem_type mtype; + size_t size, ents_off; + int i, err; + + if (!enable_unsafe_tph_ds_mode || + pcie_tph_get_st_table_loc(pdev) != PCI_TPH_LOC_NONE) + return -EOPNOTSUPP; + + if (copy_from_user(&st, uarg, sizeof(st))) + return -EFAULT; + + /* Check reserved fields are zero */ + if (memchr_inv(&st.reserved, 0, sizeof(st.reserved))) + return -EINVAL; + + if (!st.count || st.count > VFIO_PCI_TPH_MAX_ENTRIES) + return -EINVAL; + + size = st.count * sizeof(*ents); + if (op->argsz < offsetofend(struct vfio_device_pci_tph_op, st) + + sizeof(struct vfio_pci_tph_st) + size) + return -EINVAL; + + ents = kvmalloc(size, GFP_KERNEL); + if (!ents) + return -ENOMEM; + + ents_off = offsetof(struct vfio_pci_tph_st, ents); + if (copy_from_user(ents, uarg + ents_off, size)) { + err = -EFAULT; + goto out; + } + + for (i = 0; i < st.count; i++) { + /* Check reserved fields are zero */ + if (memchr_inv(&ents[i].reserved0, 0, sizeof(ents[i].reserved0)) || + memchr_inv(&ents[i].reserved1, 0, sizeof(ents[i].reserved1))) { + err = -EINVAL; + goto out; + } + + if (ents[i].mem_type == VFIO_PCI_TPH_MEM_TYPE_VM) { + mtype = TPH_MEM_TYPE_VM; + } else if (ents[i].mem_type == VFIO_PCI_TPH_MEM_TYPE_PM) { + mtype = TPH_MEM_TYPE_PM; + } else { + err = -EINVAL; + goto out; + } + + err = pcie_tph_get_cpu_st(pdev, mtype, ents[i].cpu, + &ents[i].st); + if (err) + goto out; + } + + if (copy_to_user(uarg + ents_off, ents, size)) + err = -EFAULT; + +out: + kvfree(ents); + return err; +} + static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, void __user *uarg) { @@ -1545,6 +1616,8 @@ static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, return vfio_pci_tph_enable(vdev, &op, uarg + minsz); case VFIO_PCI_TPH_DISABLE: return vfio_pci_tph_disable(vdev); + case VFIO_PCI_TPH_GET_ST: + return vfio_pci_tph_get_st(vdev, &op, uarg + minsz); default: /* Other ops are not implemented yet */ return -EINVAL; -- 2.17.1 Add VFIO_PCI_TPH_SET_ST operation to support batch programming of steering tag entries. If any entry fails, roll back successfully programmed entries to 0 to prevent inconsistent device state. Signed-off-by: Chengwen Feng --- drivers/vfio/pci/vfio_pci_core.c | 86 ++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 45e641ab2a88..867d8694b56b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1600,6 +1600,90 @@ static int vfio_pci_tph_get_st(struct vfio_pci_core_device *vdev, return err; } +static int vfio_pci_tph_set_st(struct vfio_pci_core_device *vdev, + struct vfio_device_pci_tph_op *op, + void __user *uarg) +{ + struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_tph_entry *ents; + struct vfio_pci_tph_st st; + enum tph_mem_type mtype; + size_t size, ents_off; + int i = 0, j, err; + u32 tab_loc; + u16 st_val; + + tab_loc = pcie_tph_get_st_table_loc(pdev); + if (tab_loc != PCI_TPH_LOC_CAP && tab_loc != PCI_TPH_LOC_MSIX) + return -EOPNOTSUPP; + + if (copy_from_user(&st, uarg, sizeof(st))) + return -EFAULT; + + if (!st.count || st.count > VFIO_PCI_TPH_MAX_ENTRIES) + return -EINVAL; + + /* Check reserved fields are zero */ + if (memchr_inv(&st.reserved, 0, sizeof(st.reserved))) + return -EINVAL; + + size = st.count * sizeof(*ents); + if (op->argsz < offsetofend(struct vfio_device_pci_tph_op, st) + + sizeof(struct vfio_pci_tph_st) + size) + return -EINVAL; + + ents = kvmalloc(size, GFP_KERNEL); + if (!ents) + return -ENOMEM; + + ents_off = offsetof(struct vfio_pci_tph_st, ents); + if (copy_from_user(ents, uarg + ents_off, size)) { + err = -EFAULT; + goto out; + } + + for (; i < st.count; i++) { + /* Check reserved fields are zero */ + if (memchr_inv(&ents[i].reserved0, 0, sizeof(ents[i].reserved0)) || + memchr_inv(&ents[i].reserved1, 0, sizeof(ents[i].reserved1))) { + err = -EINVAL; + goto out; + } + + if (ents[i].cpu == U32_MAX) { + err = pcie_tph_set_st_entry(pdev, ents[i].index, 0); + if (err) + goto out; + continue; + } + + if (ents[i].mem_type == VFIO_PCI_TPH_MEM_TYPE_VM) { + mtype = TPH_MEM_TYPE_VM; + } else if (ents[i].mem_type == VFIO_PCI_TPH_MEM_TYPE_PM) { + mtype = TPH_MEM_TYPE_PM; + } else { + err = -EINVAL; + goto out; + } + + err = pcie_tph_get_cpu_st(pdev, mtype, ents[i].cpu, &st_val); + if (err) + goto out; + err = pcie_tph_set_st_entry(pdev, ents[i].index, st_val); + if (err) + goto out; + } + +out: + if (err) { + /* Roll back previously programmed entries to 0 */ + for (j = 0; j < i; j++) + pcie_tph_set_st_entry(pdev, ents[j].index, 0); + } + kvfree(ents); + return err; +} + static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, void __user *uarg) { @@ -1618,6 +1702,8 @@ static int vfio_pci_ioctl_tph(struct vfio_pci_core_device *vdev, return vfio_pci_tph_disable(vdev); case VFIO_PCI_TPH_GET_ST: return vfio_pci_tph_get_st(vdev, &op, uarg + minsz); + case VFIO_PCI_TPH_SET_ST: + return vfio_pci_tph_set_st(vdev, &op, uarg + minsz); default: /* Other ops are not implemented yet */ return -EINVAL; -- 2.17.1