From: Vlad Dumitrescu NICs are typically configured with total_vfs=0, forcing users to rely on external tools to enable SR-IOV (a widely used and essential feature). Add total_vfs parameter to devlink for SR-IOV max VF configurability. Enables standard kernel tools to manage SR-IOV, addressing the need for flexible VF configuration. Signed-off-by: Vlad Dumitrescu Tested-by: Kamal Heib Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/devlink-params.rst | 3 +++ include/net/devlink.h | 4 ++++ net/devlink/param.c | 5 +++++ 3 files changed, 12 insertions(+) diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index 3da8f4ef2417..f2920371622c 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -140,3 +140,6 @@ own name. * - ``enable_phc`` - Boolean - Enable PHC (PTP Hardware Clock) functionality in the device. + * - ``total_vfs`` + - u32 + - The total number of Virtual Functions (VFs) supported by the PF. diff --git a/include/net/devlink.h b/include/net/devlink.h index d0ce5a7e984c..32508b4d5df3 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -523,6 +523,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, + DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -584,6 +585,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME "enable_phc" #define DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE DEVLINK_PARAM_TYPE_BOOL +#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME "total_vfs" +#define DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE DEVLINK_PARAM_TYPE_U32 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/devlink/param.c b/net/devlink/param.c index 396b8a7f6013..3a5fe0a639ea 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -97,6 +97,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, + .name = DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME, + .type = DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- 2.50.0 From: Saeed Mahameed Selects which algorithm should be used by the NIC in order to decide rate of CQE compression dependeng on PCIe bus conditions. Supported values: 1) balanced, merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance 2) aggressive, merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads. Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko --- Documentation/networking/devlink/mlx5.rst | 9 + .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/devlink.c | 8 + .../net/ethernet/mellanox/mlx5/core/devlink.h | 1 + .../mellanox/mlx5/core/lib/nv_param.c | 245 ++++++++++++++++++ .../mellanox/mlx5/core/lib/nv_param.h | 14 + include/linux/mlx5/driver.h | 1 + 7 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 7febe0aecd53..417e5cdcd35d 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -117,6 +117,15 @@ parameters. - driverinit - Control the size (in packets) of the hairpin queues. + * - ``cqe_compress_type`` + - string + - permanent + - Configure which algorithm should be used by the NIC in order to decide + rate of CQE compression dependeng on PCIe bus conditions. + + * ``balanced`` : Merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance + * ``aggressive`` : Merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads + The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` Info versions diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d292e6a9e22c..26c824e13c45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3ffa3fbacd16..18347b44d611 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -10,6 +10,7 @@ #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" +#include "lib/nv_param.h" static int mlx5_devlink_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -895,8 +896,14 @@ int mlx5_devlink_params_register(struct devlink *devlink) if (err) goto max_uc_list_err; + err = mlx5_nv_param_register_dl_params(devlink); + if (err) + goto nv_param_err; + return 0; +nv_param_err: + mlx5_devlink_max_uc_list_params_unregister(devlink); max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: @@ -907,6 +914,7 @@ int mlx5_devlink_params_register(struct devlink *devlink) void mlx5_devlink_params_unregister(struct devlink *devlink) { + mlx5_nv_param_unregister_dl_params(devlink); mlx5_devlink_max_uc_list_params_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); devl_params_unregister(devlink, mlx5_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 961f75da6227..74bcdfa70361 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -22,6 +22,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, + MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c new file mode 100644 index 000000000000..20a39483be04 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "nv_param.h" +#include "mlx5_core.h" + +enum { + MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a, +}; + +struct mlx5_ifc_configuration_item_type_class_global_bits { + u8 type_class[0x8]; + u8 parameter_index[0x18]; +}; + +union mlx5_ifc_config_item_type_auto_bits { + struct mlx5_ifc_configuration_item_type_class_global_bits + configuration_item_type_class_global; + u8 reserved_at_0[0x20]; +}; + +struct mlx5_ifc_config_item_bits { + u8 valid[0x2]; + u8 priority[0x2]; + u8 header_type[0x2]; + u8 ovr_en[0x1]; + u8 rd_en[0x1]; + u8 access_mode[0x2]; + u8 reserved_at_a[0x1]; + u8 writer_id[0x5]; + u8 version[0x4]; + u8 reserved_at_14[0x2]; + u8 host_id_valid[0x1]; + u8 length[0x9]; + + union mlx5_ifc_config_item_type_auto_bits type; + + u8 reserved_at_40[0x10]; + u8 crc16[0x10]; +}; + +struct mlx5_ifc_mnvda_reg_bits { + struct mlx5_ifc_config_item_bits configuration_item_header; + + u8 configuration_item_data[64][0x20]; +}; + +struct mlx5_ifc_nv_sw_offload_conf_bits { + u8 ip_over_vxlan_port[0x10]; + u8 tunnel_ecn_copy_offload_disable[0x1]; + u8 pci_atomic_mode[0x3]; + u8 sr_enable[0x1]; + u8 ptp_cyc2realtime[0x1]; + u8 vector_calc_disable[0x1]; + u8 uctx_en[0x1]; + u8 prio_tag_required_en[0x1]; + u8 esw_fdb_ipv4_ttl_modify_enable[0x1]; + u8 mkey_by_name[0x1]; + u8 ip_over_vxlan_en[0x1]; + u8 one_qp_per_recovery[0x1]; + u8 cqe_compression[0x3]; + u8 tunnel_udp_entropy_proto_disable[0x1]; + u8 reserved_at_21[0x1]; + u8 ar_enable[0x1]; + u8 log_max_outstanding_wqe[0x5]; + u8 vf_migration[0x2]; + u8 log_tx_psn_win[0x6]; + u8 lro_log_timeout3[0x4]; + u8 lro_log_timeout2[0x4]; + u8 lro_log_timeout1[0x4]; + u8 lro_log_timeout0[0x4]; +}; + +#define MNVDA_HDR_SZ \ + (MLX5_ST_SZ_BYTES(mnvda_reg) - \ + MLX5_BYTE_OFF(mnvda_reg, configuration_item_data)) + +#define MLX5_SET_CFG_ITEM_TYPE(_cls_name, _mnvda_ptr, _field, _val) \ + MLX5_SET(mnvda_reg, _mnvda_ptr, \ + configuration_item_header.type.configuration_item_type_class_##_cls_name._field, \ + _val) + +#define MLX5_SET_CFG_HDR_LEN(_mnvda_ptr, _cls_name) \ + MLX5_SET(mnvda_reg, _mnvda_ptr, configuration_item_header.length, \ + MLX5_ST_SZ_BYTES(_cls_name)) + +#define MLX5_GET_CFG_HDR_LEN(_mnvda_ptr) \ + MLX5_GET(mnvda_reg, _mnvda_ptr, configuration_item_header.length) + +static int mlx5_nv_param_read(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + u32 param_idx, type_class; + u32 header_len; + void *cls_ptr; + int err; + + if (WARN_ON(len > MLX5_ST_SZ_BYTES(mnvda_reg)) || len < MNVDA_HDR_SZ) + return -EINVAL; /* A caller bug */ + + err = mlx5_core_access_reg(dev, mnvda, len, mnvda, len, MLX5_REG_MNVDA, + 0, 0); + if (!err) + return 0; + + cls_ptr = MLX5_ADDR_OF(mnvda_reg, mnvda, + configuration_item_header.type.configuration_item_type_class_global); + + type_class = MLX5_GET(configuration_item_type_class_global, cls_ptr, + type_class); + param_idx = MLX5_GET(configuration_item_type_class_global, cls_ptr, + parameter_index); + header_len = MLX5_GET_CFG_HDR_LEN(mnvda); + + mlx5_core_warn(dev, "Failed to read mnvda reg: type_class 0x%x, param_idx 0x%x, header_len %u, err %d\n", + type_class, param_idx, header_len, err); + + return -EOPNOTSUPP; +} + +static int mlx5_nv_param_write(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + if (WARN_ON(len > MLX5_ST_SZ_BYTES(mnvda_reg)) || len < MNVDA_HDR_SZ) + return -EINVAL; + + if (WARN_ON(MLX5_GET_CFG_HDR_LEN(mnvda) == 0)) + return -EINVAL; + + return mlx5_core_access_reg(dev, mnvda, len, mnvda, len, MLX5_REG_MNVDA, + 0, 1); +} + +static int +mlx5_nv_param_read_sw_offload_conf(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_sw_offload_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static const char *const + cqe_compress_str[] = { "balanced", "aggressive" }; + +static int +mlx5_nv_param_devlink_cqe_compress_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + u8 value = U8_MAX; + void *data; + int err; + + err = mlx5_nv_param_read_sw_offload_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + value = MLX5_GET(nv_sw_offload_conf, data, cqe_compression); + + if (value >= ARRAY_SIZE(cqe_compress_str)) + return -EOPNOTSUPP; + + strscpy(ctx->val.vstr, cqe_compress_str[value], sizeof(ctx->val.vstr)); + return 0; +} + +static int +mlx5_nv_param_devlink_cqe_compress_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cqe_compress_str); i++) { + if (!strcmp(val.vstr, cqe_compress_str[i])) + return 0; + } + + NL_SET_ERR_MSG_MOD(extack, + "Invalid value, supported values are balanced/aggressive"); + return -EOPNOTSUPP; +} + +static int +mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + int err = 0; + void *data; + u8 value; + + if (!strcmp(ctx->val.vstr, "aggressive")) + value = 1; + else /* balanced: can't be anything else already validated above */ + value = 0; + + err = mlx5_nv_param_read_sw_offload_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read sw_offload_conf mnvda reg"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + MLX5_SET(nv_sw_offload_conf, data, cqe_compression, value); + + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static const struct devlink_param mlx5_nv_param_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, + "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_nv_param_devlink_cqe_compress_get, + mlx5_nv_param_devlink_cqe_compress_set, + mlx5_nv_param_devlink_cqe_compress_validate), +}; + +int mlx5_nv_param_register_dl_params(struct devlink *devlink) +{ + if (!mlx5_core_is_pf(devlink_priv(devlink))) + return 0; + + return devl_params_register(devlink, mlx5_nv_param_devlink_params, + ARRAY_SIZE(mlx5_nv_param_devlink_params)); +} + +void mlx5_nv_param_unregister_dl_params(struct devlink *devlink) +{ + if (!mlx5_core_is_pf(devlink_priv(devlink))) + return; + + devl_params_unregister(devlink, mlx5_nv_param_devlink_params, + ARRAY_SIZE(mlx5_nv_param_devlink_params)); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h new file mode 100644 index 000000000000..9f4922ff7745 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_NV_PARAM_H +#define __MLX5_NV_PARAM_H + +#include +#include "devlink.h" + +int mlx5_nv_param_register_dl_params(struct devlink *devlink); +void mlx5_nv_param_unregister_dl_params(struct devlink *devlink); + +#endif + diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e6ba8f4f4bd1..96ce152e739f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -135,6 +135,7 @@ enum { MLX5_REG_MTCAP = 0x9009, MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, + MLX5_REG_MNVDA = 0x9024, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, MLX5_REG_MRTC = 0x902d, -- 2.50.0 From: Vlad Dumitrescu Example usage: devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent devlink dev reload pci/0000:01:00.0 action fw_activate echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove echo 1 >/sys/bus/pci/rescan grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_* Signed-off-by: Vlad Dumitrescu Tested-by: Kamal Heib Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/mlx5.rst | 14 +- .../mellanox/mlx5/core/lib/nv_param.c | 199 ++++++++++++++++++ 2 files changed, 210 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 417e5cdcd35d..587e0200c1cd 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -15,23 +15,31 @@ Parameters * - Name - Mode - Validation + - Notes * - ``enable_roce`` - driverinit - - Type: Boolean - - If the device supports RoCE disablement, RoCE enablement state controls + - Boolean + - If the device supports RoCE disablement, RoCE enablement state controls device support for RoCE capability. Otherwise, the control occurs in the driver stack. When RoCE is disabled at the driver level, only raw ethernet QPs are supported. * - ``io_eq_size`` - driverinit - The range is between 64 and 4096. + - * - ``event_eq_size`` - driverinit - The range is between 64 and 4096. + - * - ``max_macs`` - driverinit - The range is between 1 and 2^31. Only power of 2 values are supported. + - + * - ``enable_sriov`` + - permanent + - Boolean + - Applies to each physical function (PF) independently, if the device + supports it. Otherwise, it applies symmetrically to all PFs. The ``mlx5`` driver also implements the following driver-specific parameters. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c index 20a39483be04..ed2129843ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -5,7 +5,11 @@ #include "mlx5_core.h" enum { + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF = 0x80, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP = 0x81, MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a, + + MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF = 0x80, }; struct mlx5_ifc_configuration_item_type_class_global_bits { @@ -13,9 +17,18 @@ struct mlx5_ifc_configuration_item_type_class_global_bits { u8 parameter_index[0x18]; }; +struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits { + u8 type_class[0x8]; + u8 pf_index[0x6]; + u8 pci_bus_index[0x8]; + u8 parameter_index[0xa]; +}; + union mlx5_ifc_config_item_type_auto_bits { struct mlx5_ifc_configuration_item_type_class_global_bits configuration_item_type_class_global; + struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits + configuration_item_type_class_per_host_pf; u8 reserved_at_0[0x20]; }; @@ -45,6 +58,45 @@ struct mlx5_ifc_mnvda_reg_bits { u8 configuration_item_data[64][0x20]; }; +struct mlx5_ifc_nv_global_pci_conf_bits { + u8 sriov_valid[0x1]; + u8 reserved_at_1[0x10]; + u8 per_pf_total_vf[0x1]; + u8 reserved_at_12[0xe]; + + u8 sriov_en[0x1]; + u8 reserved_at_21[0xf]; + u8 total_vfs[0x10]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_nv_global_pci_cap_bits { + u8 max_vfs_per_pf_valid[0x1]; + u8 reserved_at_1[0x13]; + u8 per_pf_total_vf_supported[0x1]; + u8 reserved_at_15[0xb]; + + u8 sriov_support[0x1]; + u8 reserved_at_21[0xf]; + u8 max_vfs_per_pf[0x10]; + + u8 reserved_at_40[0x60]; +}; + +struct mlx5_ifc_nv_pf_pci_conf_bits { + u8 reserved_at_0[0x9]; + u8 pf_total_vf_en[0x1]; + u8 reserved_at_a[0x16]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x10]; + u8 total_vf[0x10]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_nv_sw_offload_conf_bits { u8 ip_over_vxlan_port[0x10]; u8 tunnel_ecn_copy_offload_disable[0x1]; @@ -216,7 +268,154 @@ mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 id, return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); } +static int mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_global_pci_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_global_pci_cap); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3); + MLX5_SET_CFG_ITEM_TYPE(per_host_pf, mnvda, parameter_index, + MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_pf_pci_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + bool sriov_en = false; + void *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + ctx->val.vbool = false; + return 0; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + sriov_en = MLX5_GET(nv_global_pci_conf, data, sriov_en); + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { + ctx->val.vbool = sriov_en; + return 0; + } + + /* SRIOV is per PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + ctx->val.vbool = sriov_en && + MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en); + return 0; +} + +static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + bool per_pf_support; + void *cap, *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read global PCI capability"); + return err; + } + + cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + per_pf_support = MLX5_GET(nv_global_pci_cap, cap, + per_pf_total_vf_supported); + + if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) { + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not supported on this device"); + return -EOPNOTSUPP; + } + + if (!per_pf_support) { + /* We don't allow global SRIOV setting on per PF devlink */ + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not per PF on this device"); + return -EOPNOTSUPP; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to read global PCI configuration"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + /* setup per PF sriov mode */ + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); + MLX5_SET(nv_global_pci_conf, data, sriov_en, 1); + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, 1); + + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to write global PCI configuration"); + return err; + } + + /* enable/disable sriov on this PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to read per host PF configuration"); + return err; + } + MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + static const struct devlink_param mlx5_nv_param_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_devlink_enable_sriov_get, + mlx5_devlink_enable_sriov_set, NULL), DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_PERMANENT), -- 2.50.0 From: Vlad Dumitrescu Some devices support both symmetric (same value for all PFs) and asymmetric, while others only support symmetric configuration. This implementation prefers asymmetric, since it is closer to the devlink model (per function settings), but falls back to symmetric when needed. Example usage: devlink dev param set pci/0000:01:00.0 name total_vfs value cmode permanent devlink dev reload pci/0000:01:00.0 action fw_activate echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove echo 1 >/sys/bus/pci/rescan cat /sys/bus/pci/devices/0000:01:00.0/sriov_totalvfs Signed-off-by: Vlad Dumitrescu Reviewed-by: Jiri Pirko Tested-by: Kamal Heib Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/mlx5.rst | 22 +++ .../mellanox/mlx5/core/lib/nv_param.c | 132 ++++++++++++++++++ 2 files changed, 154 insertions(+) diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 587e0200c1cd..00a43324dec2 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -40,6 +40,28 @@ Parameters - Boolean - Applies to each physical function (PF) independently, if the device supports it. Otherwise, it applies symmetrically to all PFs. + * - ``total_vfs`` + - permanent + - The range is between 1 and a device-specific max. + - Applies to each physical function (PF) independently, if the device + supports it. Otherwise, it applies symmetrically to all PFs. + +Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect + +.. code-block:: bash + + # setup parameters + devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent + devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent + + # Fw reset + devlink dev reload pci/0000:01:00.0 action fw_activate + + # for PCI related config such as sriov PCI reset/rescan is required: + echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove + echo 1 >/sys/bus/pci/rescan + grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_* + The ``mlx5`` driver also implements the following driver-specific parameters. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c index ed2129843ec7..383d8cfe4c0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -412,10 +412,142 @@ static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id, return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); } +static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + ctx->val.vu32 = 0; + return 0; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { + ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs); + return 0; + } + + /* SRIOV is per PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf); + + return 0; +} + +static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)]; + bool per_pf_support; + void *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device"); + return -EOPNOTSUPP; + } + + per_pf_support = MLX5_GET(nv_global_pci_cap, data, + per_pf_total_vf_supported); + if (!per_pf_support) { + /* We don't allow global SRIOV setting on per PF devlink */ + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not per PF on this device"); + return -EOPNOTSUPP; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support); + + if (!per_pf_support) { + MLX5_SET(nv_global_pci_conf, data, total_vfs, ctx->val.vu32); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + } + + /* SRIOV is per PF */ + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 cap[MLX5_ST_SZ_DW(mnvda_reg)]; + void *data; + u16 max; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid)) + return 0; /* optimistic, but set might fail later */ + + max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf); + if (val.vu16 > max) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max allowed by device is %u", max); + return -EINVAL; + } + + return 0; +} + static const struct devlink_param mlx5_nv_param_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), mlx5_devlink_enable_sriov_get, mlx5_devlink_enable_sriov_set, NULL), + DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_devlink_total_vfs_get, + mlx5_devlink_total_vfs_set, + mlx5_devlink_total_vfs_validate), DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_PERMANENT), -- 2.50.0 From: Jiri Pirko As the follow-up patch will need to get struct devlink_port *, avoid unnecessary lookup and instead of port_index pass the struct devlink_port * directly. Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- net/devlink/param.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/devlink/param.c b/net/devlink/param.c index 3a5fe0a639ea..fcb59763530a 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -227,7 +227,7 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, } static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, - unsigned int port_index, + struct devlink_port *devlink_port, struct devlink_param_item *param_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) @@ -273,7 +273,8 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (cmd == DEVLINK_CMD_PORT_PARAM_GET || cmd == DEVLINK_CMD_PORT_PARAM_NEW || cmd == DEVLINK_CMD_PORT_PARAM_DEL) - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, + devlink_port->index)) goto genlmsg_cancel; param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM); @@ -315,7 +316,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, } static void devlink_param_notify(struct devlink *devlink, - unsigned int port_index, + struct devlink_port *devlink_port, struct devlink_param_item *param_item, enum devlink_command cmd) { @@ -336,7 +337,7 @@ static void devlink_param_notify(struct devlink *devlink, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd, + err = devlink_nl_param_fill(msg, devlink, devlink_port, param_item, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); @@ -353,7 +354,7 @@ static void devlink_params_notify(struct devlink *devlink, unsigned long param_id; xa_for_each(&devlink->params, param_id, param_item) - devlink_param_notify(devlink, 0, param_item, cmd); + devlink_param_notify(devlink, NULL, param_item, cmd); } void devlink_params_notify_register(struct devlink *devlink) @@ -377,7 +378,7 @@ static int devlink_nl_param_get_dump_one(struct sk_buff *msg, int err = 0; xa_for_each_start(&devlink->params, param_id, param_item, state->idx) { - err = devlink_nl_param_fill(msg, devlink, 0, param_item, + err = devlink_nl_param_fill(msg, devlink, NULL, param_item, DEVLINK_CMD_PARAM_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); @@ -483,7 +484,7 @@ int devlink_nl_param_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_param_fill(msg, devlink, 0, param_item, + err = devlink_nl_param_fill(msg, devlink, NULL, param_item, DEVLINK_CMD_PARAM_GET, info->snd_portid, info->snd_seq, 0); if (err) { @@ -495,7 +496,7 @@ int devlink_nl_param_get_doit(struct sk_buff *skb, } static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, - unsigned int port_index, + struct devlink_port *devlink_port, struct xarray *params, struct genl_info *info, enum devlink_command cmd) @@ -545,7 +546,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return err; } - devlink_param_notify(devlink, port_index, param_item, cmd); + devlink_param_notify(devlink, devlink_port, param_item, cmd); return 0; } @@ -553,7 +554,7 @@ int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params, + return __devlink_nl_cmd_param_set_doit(devlink, NULL, &devlink->params, info, DEVLINK_CMD_PARAM_NEW); } @@ -612,7 +613,7 @@ static int devlink_param_register(struct devlink *devlink, if (err) goto err_xa_insert; - devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_NEW); return 0; err_xa_insert: @@ -628,7 +629,7 @@ static void devlink_param_unregister(struct devlink *devlink, param_item = devlink_param_find_by_id(&devlink->params, param->id); if (WARN_ON(!param_item)) return; - devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL); + devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_DEL); xa_erase(&devlink->params, param->id); kfree(param_item); } @@ -789,7 +790,7 @@ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; - devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set); @@ -828,6 +829,6 @@ void devl_param_value_changed(struct devlink *devlink, u32 param_id) param_item = devlink_param_find_by_id(&devlink->params, param_id); WARN_ON(!param_item); - devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devl_param_value_changed); -- 2.50.0 From: Saeed Mahameed Port params infrastructure is incomplete and needs a bit of plumbing to support port params commands from netlink. Introduce port params registration API, very similar to current devlink params API, add the params xarray to devlink_port structure and decouple devlink params registration routines from the devlink structure. Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko --- include/net/devlink.h | 14 ++++ net/devlink/param.c | 151 ++++++++++++++++++++++++++++++++++-------- net/devlink/port.c | 3 + 3 files changed, 141 insertions(+), 27 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 32508b4d5df3..ffb47f62d575 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -127,6 +127,7 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; const struct devlink_port_ops *ops; + struct xarray params; unsigned int index; spinlock_t type_lock; /* Protects type and type_eth/ib * structures consistency. @@ -1835,6 +1836,19 @@ void devl_params_unregister(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +int devl_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); +void devl_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); + int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *val); void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, diff --git a/net/devlink/param.c b/net/devlink/param.c index fcb59763530a..3103091c2da7 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -590,13 +590,16 @@ static int devlink_param_verify(const struct devlink_param *param) } static int devlink_param_register(struct devlink *devlink, + struct devlink_port *devlink_port, + struct xarray *params_arr, const struct devlink_param *param) { struct devlink_param_item *param_item; + enum devlink_command cmd; int err; WARN_ON(devlink_param_verify(param)); - WARN_ON(devlink_param_find_by_name(&devlink->params, param->name)); + WARN_ON(devlink_param_find_by_name(params_arr, param->name)); if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) WARN_ON(param->get || param->set); @@ -609,11 +612,13 @@ static int devlink_param_register(struct devlink *devlink, param_item->param = param; - err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL); + err = xa_insert(params_arr, param->id, param_item, GFP_KERNEL); if (err) goto err_xa_insert; - devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_NEW); + cmd = devlink_port ? DEVLINK_CMD_PORT_PARAM_NEW : DEVLINK_CMD_PARAM_NEW; + devlink_param_notify(devlink, devlink_port, param_item, cmd); + return 0; err_xa_insert: @@ -622,30 +627,28 @@ static int devlink_param_register(struct devlink *devlink, } static void devlink_param_unregister(struct devlink *devlink, + struct devlink_port *devlink_port, + struct xarray *params_arr, const struct devlink_param *param) { struct devlink_param_item *param_item; + enum devlink_command cmd; - param_item = devlink_param_find_by_id(&devlink->params, param->id); + param_item = devlink_param_find_by_id(params_arr, param->id); if (WARN_ON(!param_item)) return; - devlink_param_notify(devlink, NULL, param_item, DEVLINK_CMD_PARAM_DEL); - xa_erase(&devlink->params, param->id); + + cmd = devlink_port ? DEVLINK_CMD_PORT_PARAM_DEL : DEVLINK_CMD_PARAM_DEL; + devlink_param_notify(devlink, devlink_port, param_item, cmd); + xa_erase(params_arr, param->id); kfree(param_item); } -/** - * devl_params_register - register configuration parameters - * - * @devlink: devlink - * @params: configuration parameters array - * @params_count: number of parameters provided - * - * Register the configuration parameters supported by the driver. - */ -int devl_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) +static int __devlink_params_register(struct devlink *devlink, + struct devlink_port *devlink_port, + struct xarray *params_arr, + const struct devlink_param *params, + size_t params_count) { const struct devlink_param *param = params; int i, err; @@ -653,10 +656,12 @@ int devl_params_register(struct devlink *devlink, lockdep_assert_held(&devlink->lock); for (i = 0; i < params_count; i++, param++) { - err = devlink_param_register(devlink, param); + err = devlink_param_register(devlink, devlink_port, params_arr, + param); if (err) goto rollback; } + return 0; rollback: @@ -664,9 +669,28 @@ int devl_params_register(struct devlink *devlink, return err; for (param--; i > 0; i--, param--) - devlink_param_unregister(devlink, param); + devlink_param_unregister(devlink, devlink_port, params_arr, + param); + return err; } + +/** + * devl_params_register - register configuration parameters + * + * @devlink: devlink + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the driver. + */ +int devl_params_register(struct devlink *devlink, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink, NULL, &devlink->params, + params, params_count); +} EXPORT_SYMBOL_GPL(devl_params_register); int devlink_params_register(struct devlink *devlink, @@ -682,6 +706,22 @@ int devlink_params_register(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_params_register); +static void __devlink_params_unregister(struct devlink *devlink, + struct devlink_port *devlink_port, + struct xarray *params_arr, + const struct devlink_param *params, + size_t params_count) +{ + const struct devlink_param *param = params; + int i; + + lockdep_assert_held(&devlink->lock); + + for (i = 0; i < params_count; i++, param++) + devlink_param_unregister(devlink, devlink_port, params_arr, + param); +} + /** * devl_params_unregister - unregister configuration parameters * @devlink: devlink @@ -692,13 +732,8 @@ void devl_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i; - - lockdep_assert_held(&devlink->lock); - - for (i = 0; i < params_count; i++, param++) - devlink_param_unregister(devlink, param); + __devlink_params_unregister(devlink, NULL, &devlink->params, + params, params_count); } EXPORT_SYMBOL_GPL(devl_params_unregister); @@ -712,6 +747,68 @@ void devlink_params_unregister(struct devlink *devlink, } EXPORT_SYMBOL_GPL(devlink_params_unregister); +/** + * devl_port_params_register - register configuration parameters for port + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the driver for the + * specific port. + */ +int devl_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink_port->devlink, + devlink_port, + &devlink_port->params, + params, params_count); +} +EXPORT_SYMBOL_GPL(devl_port_params_register); + +/** + * devl_port_params_unregister - unregister configuration parameters for + * devlink port + * + * @devlink_port: devlink port + * @params: configuration parameters to unregister + * @params_count: number of parameters provided + */ +void devl_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + __devlink_params_unregister(devlink_port->devlink, devlink_port, + &devlink_port->params, + params, params_count); +} +EXPORT_SYMBOL_GPL(devl_port_params_unregister); + +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + int err; + + devl_lock(devlink_port->devlink); + err = devl_port_params_register(devlink_port, params, params_count); + devl_unlock(devlink_port->devlink); + return err; +} +EXPORT_SYMBOL_GPL(devlink_port_params_register); + +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + devl_lock(devlink_port->devlink); + devl_port_params_unregister(devlink_port, params, params_count); + devl_unlock(devlink_port->devlink); +} +EXPORT_SYMBOL_GPL(devlink_port_params_unregister); + /** * devl_param_driverinit_value_get - get configuration parameter * value for driver initializing diff --git a/net/devlink/port.c b/net/devlink/port.c index 939081a0e615..39bba3f7a1f9 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1075,6 +1075,8 @@ int devl_port_register_with_ops(struct devlink *devlink, devlink_port->registered = true; devlink_port->index = port_index; devlink_port->ops = ops ? ops : &devlink_port_dummy_ops; + xa_init_flags(&devlink_port->params, XA_FLAGS_ALLOC); + spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL); @@ -1134,6 +1136,7 @@ void devl_port_unregister(struct devlink_port *devlink_port) devlink_port_type_warn_cancel(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); xa_erase(&devlink_port->devlink->ports, devlink_port->index); + xa_destroy(&devlink_port->params); WARN_ON(!list_empty(&devlink_port->reporter_list)); devlink_port->registered = false; } -- 2.50.0 From: Saeed Mahameed Add missing port-params netlink attributes and policies to devlink's spec, reuse existing get_doit/dump_doit of the devlink params for port params and implement the dump command for all devlink ports params. This implements: 1) devlink port param show 2) devlink port param show / 3) devlink port param show / name Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko --- Documentation/netlink/specs/devlink.yaml | 13 ++-- include/net/devlink.h | 1 + net/devlink/netlink_gen.c | 16 ++++- net/devlink/param.c | 89 ++++++++++++++++++++---- 4 files changed, 100 insertions(+), 19 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 1c4bb0cbe5f0..9e1cb4cc7fe1 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1880,12 +1880,17 @@ operations: pre: devlink-nl-pre-doit-port post: devlink-nl-post-doit request: - attributes: *port-id-attrs - reply: - attributes: *port-id-attrs + attributes: &port-param-id-attrs + - bus-name + - dev-name + - port-index + - param-name + reply: &port-param-get-reply + attributes: *port-param-id-attrs dump: - reply: + request: attributes: *port-id-attrs + reply: *port-param-get-reply - name: port-param-set diff --git a/include/net/devlink.h b/include/net/devlink.h index ffb47f62d575..b2517813ce17 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -441,6 +441,7 @@ union devlink_param_value { struct devlink_param_gset_ctx { union devlink_param_value val; enum devlink_param_cmode cmode; + struct devlink_port *devlink_port; }; /** diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index c50436433c18..010a7f216388 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -354,7 +354,15 @@ static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION }; /* DEVLINK_CMD_PORT_PARAM_GET - do */ -static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_PORT_PARAM_GET - dump */ +static const struct nla_policy devlink_port_param_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, @@ -972,14 +980,16 @@ const struct genl_split_ops devlink_nl_ops[74] = { .pre_doit = devlink_nl_pre_doit_port, .doit = devlink_nl_port_param_get_doit, .post_doit = devlink_nl_post_doit, - .policy = devlink_port_param_get_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .policy = devlink_port_param_get_do_nl_policy, + .maxattr = DEVLINK_ATTR_PARAM_NAME, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_PORT_PARAM_GET, .validate = GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_port_param_get_dumpit, + .policy = devlink_port_param_get_dump_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { diff --git a/net/devlink/param.c b/net/devlink/param.c index 3103091c2da7..9be343a0ffd3 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -158,11 +158,14 @@ devlink_param_cmode_is_supported(const struct devlink_param *param, } static int devlink_param_get(struct devlink *devlink, + struct devlink_port *devlink_port, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { if (!param->get) return -EOPNOTSUPP; + + ctx->devlink_port = devlink_port; return param->get(devlink, param->id, ctx); } @@ -235,7 +238,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; - struct devlink_param_gset_ctx ctx; + struct devlink_param_gset_ctx ctx = {}; struct nlattr *param_values_list; struct nlattr *param_attr; void *hdr; @@ -255,7 +258,8 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; } else { ctx.cmode = i; - err = devlink_param_get(devlink, param, &ctx); + err = devlink_param_get(devlink, devlink_port, param, + &ctx); if (err) return err; param_value[i] = ctx.val; @@ -468,15 +472,17 @@ devlink_param_get_from_info(struct xarray *params, struct genl_info *info) return devlink_param_find_by_name(params, param_name); } -int devlink_nl_param_get_doit(struct sk_buff *skb, - struct genl_info *info) +static int __devlink_nl_param_get_doit(struct devlink *devlink, + struct devlink_port *devlink_port, + struct xarray *params, + struct genl_info *info, + enum devlink_command cmd) { - struct devlink *devlink = info->user_ptr[0]; struct devlink_param_item *param_item; struct sk_buff *msg; int err; - param_item = devlink_param_get_from_info(&devlink->params, info); + param_item = devlink_param_get_from_info(params, info); if (!param_item) return -EINVAL; @@ -484,8 +490,7 @@ int devlink_nl_param_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_param_fill(msg, devlink, NULL, param_item, - DEVLINK_CMD_PARAM_GET, + err = devlink_nl_param_fill(msg, devlink, devlink_port, param_item, cmd, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); @@ -495,6 +500,14 @@ int devlink_nl_param_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } +int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + return __devlink_nl_param_get_doit(devlink, NULL, &devlink->params, + info, DEVLINK_CMD_PARAM_GET); +} + static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, struct devlink_port *devlink_port, struct xarray *params, @@ -558,18 +571,70 @@ int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info) info, DEVLINK_CMD_PARAM_NEW); } +static int +devlink_nl_port_param_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb, + int flags) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + const struct genl_info *info = genl_info_dump(cb); + unsigned long port_index_end = ULONG_MAX; + struct devlink_param_item *param_item; + struct nlattr **attrs = info->attrs; + unsigned long port_index_start = 0; + struct devlink_port *devlink_port; + unsigned long port_index; + unsigned long param_id; + int idx = 0; + int err = 0; + + if (attrs && attrs[DEVLINK_ATTR_PORT_INDEX]) { + port_index_start = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); + port_index_end = port_index_start; + flags |= NLM_F_DUMP_FILTERED; + } + + xa_for_each_range(&devlink->ports, port_index, devlink_port, + port_index_start, port_index_end) { + xa_for_each_start(&devlink_port->params, param_id, param_item, + state->idx) { + if (idx < state->idx) { + idx++; + continue; + } + err = devlink_nl_param_fill(msg, devlink, devlink_port, + param_item, + DEVLINK_CMD_PORT_PARAM_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, flags); + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { + state->idx = param_id; + break; + } + } + } + + return err; +} + int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { - NL_SET_ERR_MSG(cb->extack, "Port params are not supported"); - return msg->len; + return devlink_nl_dumpit(msg, cb, devlink_nl_port_param_get_dump_one); } int devlink_nl_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - NL_SET_ERR_MSG(info->extack, "Port params are not supported"); - return -EINVAL; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; + + return __devlink_nl_param_get_doit(devlink, devlink_port, + &devlink_port->params, + info, DEVLINK_CMD_PORT_PARAM_GET); } int devlink_nl_port_param_set_doit(struct sk_buff *skb, -- 2.50.0 From: Saeed Mahameed Add missing port-params netlink attributes and policies to devlink's spec, reuse existing set_doit of the devlink dev params. This implements: devlink port param set / name value \ cmode Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/netlink/specs/devlink.yaml | 9 ++++++++- net/devlink/netlink_gen.c | 7 +++++-- net/devlink/param.c | 16 ++++++++++++---- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 9e1cb4cc7fe1..606070ae75f5 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1902,7 +1902,14 @@ operations: pre: devlink-nl-pre-doit-port post: devlink-nl-post-doit request: - attributes: *port-id-attrs + attributes: + - bus-name + - dev-name + - port-index + - param-name + - param-type + # param-value-data is missing here as the type is variable + - param-value-cmode - name: info-get diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 010a7f216388..0c2e58e75022 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -369,10 +369,13 @@ static const struct nla_policy devlink_port_param_get_dump_nl_policy[DEVLINK_ATT }; /* DEVLINK_CMD_PORT_PARAM_SET - do */ -static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PARAM_VALUE_CMODE + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PARAM_TYPE] = NLA_POLICY_VALIDATE_FN(NLA_U8, &devlink_attr_param_type_validate), + [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), }; /* DEVLINK_CMD_INFO_GET - do */ @@ -999,7 +1002,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_param_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_param_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_PARAM_VALUE_CMODE, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/devlink/param.c b/net/devlink/param.c index 9be343a0ffd3..5f9cd492e40c 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -170,12 +170,15 @@ static int devlink_param_get(struct devlink *devlink, } static int devlink_param_set(struct devlink *devlink, + struct devlink_port *devlink_port, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack) { if (!param->set) return -EOPNOTSUPP; + + ctx->devlink_port = devlink_port; return param->set(devlink, param->id, ctx, extack); } @@ -514,8 +517,8 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, struct genl_info *info, enum devlink_command cmd) { + struct devlink_param_gset_ctx ctx = {}; enum devlink_param_type param_type; - struct devlink_param_gset_ctx ctx; enum devlink_param_cmode cmode; struct devlink_param_item *param_item; const struct devlink_param *param; @@ -554,7 +557,8 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return -EOPNOTSUPP; ctx.val = value; ctx.cmode = cmode; - err = devlink_param_set(devlink, param, &ctx, info->extack); + err = devlink_param_set(devlink, devlink_port, param, + &ctx, info->extack); if (err) return err; } @@ -640,8 +644,12 @@ int devlink_nl_port_param_get_doit(struct sk_buff *skb, int devlink_nl_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - NL_SET_ERR_MSG(info->extack, "Port params are not supported"); - return -EINVAL; + struct devlink_port *devlink_port = info->user_ptr[1]; + struct devlink *devlink = info->user_ptr[0]; + + return __devlink_nl_cmd_param_set_doit(devlink, devlink_port, + &devlink_port->params, info, + DEVLINK_CMD_PORT_PARAM_NEW); } static int devlink_param_verify(const struct devlink_param *param) -- 2.50.0 From: Saeed Mahameed Devices that support this in permanent mode will be requested to keep the port link up even when driver is not loaded, netdev carrier state won't affect the physical port link state. This is useful for when the link is needed to access onboard management such as BMC, even if the host driver isn't loaded. Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/devlink-params.rst | 4 ++++ include/net/devlink.h | 4 ++++ net/devlink/param.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst index f2920371622c..0e9c0e17573d 100644 --- a/Documentation/networking/devlink/devlink-params.rst +++ b/Documentation/networking/devlink/devlink-params.rst @@ -143,3 +143,7 @@ own name. * - ``total_vfs`` - u32 - The total number of Virtual Functions (VFs) supported by the PF. + * - ``keep_link_up`` + - Boolean + - When enabled, the device will keep the port link up even if the driver is + not loaded. diff --git a/include/net/devlink.h b/include/net/devlink.h index b2517813ce17..13331194e143 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -526,6 +526,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC, DEVLINK_PARAM_GENERIC_ID_TOTAL_VFS, + DEVLINK_PARAM_GENERIC_ID_KEEP_LINK_UP, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -590,6 +591,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME "total_vfs" #define DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE DEVLINK_PARAM_TYPE_U32 +#define DEVLINK_PARAM_GENERIC_KEEP_LINK_UP_NAME "keep_link_up" +#define DEVLINK_PARAM_GENERIC_KEEP_LINK_UP_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/devlink/param.c b/net/devlink/param.c index 5f9cd492e40c..2a222d1bf81c 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -102,6 +102,10 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_TOTAL_VFS_NAME, .type = DEVLINK_PARAM_GENERIC_TOTAL_VFS_TYPE, }, + { .id = DEVLINK_PARAM_GENERIC_ID_KEEP_LINK_UP, + .name = DEVLINK_PARAM_GENERIC_KEEP_LINK_UP_NAME, + .type = DEVLINK_PARAM_GENERIC_KEEP_LINK_UP_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- 2.50.0 From: Saeed Mahameed When set, the NIC keeps the link up as long as the host is not in standby mode, even when the driver is not loaded. When enabled, netdev carrier state won't affect the physical port link state. This is useful for when the link is needed to access onboard management such as BMC, even if the host driver isn't loaded. Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/mlx5.rst | 5 + .../ethernet/mellanox/mlx5/core/en/devlink.c | 17 ++- .../ethernet/mellanox/mlx5/core/en/devlink.h | 3 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +- .../mellanox/mlx5/core/lib/nv_param.c | 142 ++++++++++++++++++ .../mellanox/mlx5/core/lib/nv_param.h | 4 + 6 files changed, 169 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 00a43324dec2..c9c064de4699 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -45,6 +45,11 @@ Parameters - The range is between 1 and a device-specific max. - Applies to each physical function (PF) independently, if the device supports it. Otherwise, it applies symmetrically to all PFs. + * - ``keep_link_up`` + - permanent + - Boolean + - When set, the NIC keeps the link up as long as the host is not in standby + mode, even when the driver is not loaded. Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 0b1ac6e5c890..eccb8511582f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -3,6 +3,7 @@ #include "en/devlink.h" #include "eswitch.h" +#include "lib/nv_param.h" static const struct devlink_ops mlx5e_devlink_ops = { }; @@ -54,6 +55,7 @@ int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, struct devlink_port_attrs attrs = {}; struct netdev_phys_item_id ppid = {}; unsigned int dl_port_index; + int err; if (mlx5_core_is_pf(mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; @@ -72,11 +74,20 @@ int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, devlink_port_attrs_set(&mlx5e_dev->dl_port, &attrs); - return devlink_port_register(devlink, &mlx5e_dev->dl_port, - dl_port_index); + err = devlink_port_register(devlink, &mlx5e_dev->dl_port, + dl_port_index); + if (err) + return err; + + err = mlx5_nv_port_param_register(mdev, &mlx5e_dev->dl_port); + if (err) + mlx5_core_warn(mdev, "Failed to register eth port params\n"); + return 0; } -void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev) +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev) { + mlx5_nv_port_param_unregister(mdev, &mlx5e_dev->dl_port); devlink_port_unregister(&mlx5e_dev->dl_port); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index d5ec4461f300..049d82732f72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -12,6 +12,7 @@ struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev); int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, struct mlx5_core_dev *mdev); -void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev); +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e8e5b347f9b2..f444eaaeae3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6658,7 +6658,7 @@ static int _mlx5e_probe(struct auxiliary_device *adev) err_destroy_netdev: mlx5e_destroy_netdev(priv); err_devlink_port_unregister: - mlx5e_devlink_port_unregister(mlx5e_dev); + mlx5e_devlink_port_unregister(mlx5e_dev, mdev); err_devlink_unregister: mlx5e_destroy_devlink(mlx5e_dev); return err; @@ -6712,7 +6712,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev) if (priv->profile) priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); - mlx5e_devlink_port_unregister(mlx5e_dev); + mlx5e_devlink_port_unregister(mlx5e_dev, mdev); mlx5e_destroy_devlink(mlx5e_dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c index 383d8cfe4c0a..a7578eac2dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -3,12 +3,15 @@ #include "nv_param.h" #include "mlx5_core.h" +#include "en.h" enum { MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF = 0x80, MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP = 0x81, MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a, + MLX5_CLASS_1_CTRL_ID_NV_KEEP_LINK_UP = 0x190, + MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF = 0x80, }; @@ -17,6 +20,12 @@ struct mlx5_ifc_configuration_item_type_class_global_bits { u8 parameter_index[0x18]; }; +struct mlx5_ifc_configuration_item_type_class_physical_port_bits { + u8 type_class[0x8]; + u8 port[0x8]; + u8 parameter_index[0x10]; +}; + struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits { u8 type_class[0x8]; u8 pf_index[0x6]; @@ -29,6 +38,9 @@ union mlx5_ifc_config_item_type_auto_bits { configuration_item_type_class_global; struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits configuration_item_type_class_per_host_pf; + struct mlx5_ifc_configuration_item_type_class_physical_port_bits + configuration_item_type_class_physical_port; + u8 reserved_at_0[0x20]; }; @@ -123,6 +135,16 @@ struct mlx5_ifc_nv_sw_offload_conf_bits { u8 lro_log_timeout0[0x4]; }; +struct mlx5_ifc_nv_keep_link_up_bits { + u8 reserved_at_0[0x1a]; + u8 auto_power_save_link_down[0x1]; + u8 do_not_clear_port_stats[0x1]; + u8 keep_link_up_on_standby[0x1]; + u8 keep_link_up_on_boot[0x1]; + u8 keep_ib_link_up[0x1]; + u8 keep_eth_link_up[0x1]; +}; + #define MNVDA_HDR_SZ \ (MLX5_ST_SZ_BYTES(mnvda_reg) - \ MLX5_BYTE_OFF(mnvda_reg, configuration_item_data)) @@ -574,3 +596,123 @@ void mlx5_nv_param_unregister_dl_params(struct devlink *devlink) ARRAY_SIZE(mlx5_nv_param_devlink_params)); } +/* mlx5e devlink port params */ + +static int mlx5_nv_param_read_keep_link_up(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(physical_port, mnvda, type_class, 1); + MLX5_SET_CFG_ITEM_TYPE(physical_port, mnvda, parameter_index, + MLX5_CLASS_1_CTRL_ID_NV_KEEP_LINK_UP); + MLX5_SET_CFG_ITEM_TYPE(physical_port, mnvda, port, + mlx5_get_dev_index(dev) + 1); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_keep_link_up); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int +mlx5_nv_port_param_keep_link_up_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5e_dev *edev = devlink_priv(devlink); + struct mlx5_core_dev *dev = edev->priv->mdev; + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err; + + err = mlx5_nv_param_read_keep_link_up(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + ctx->val.vbool = + !!MLX5_GET(nv_keep_link_up, data, keep_eth_link_up); + else if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) + ctx->val.vbool = + !!MLX5_GET(nv_keep_link_up, data, keep_ib_link_up); + else + ctx->val.vbool = false; + + return 0; +} + +static int +mlx5_nv_port_param_keep_link_up_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5e_dev *edev = devlink_priv(devlink); + struct mlx5_core_dev *dev = edev->priv->mdev; + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err; + + err = mlx5_nv_param_read_keep_link_up(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + MLX5_SET(nv_keep_link_up, data, + keep_eth_link_up, ctx->val.vbool); + else if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) + MLX5_SET(nv_keep_link_up, data, keep_ib_link_up, + ctx->val.vbool); + else + return -EOPNOTSUPP; + + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int +mlx5_nv_port_param_keep_link_up_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5e_dev *edev = devlink_priv(devlink); + struct mlx5_core_dev *dev = edev->priv->mdev; + + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_IB) { + NL_SET_ERR_MSG_MOD(extack, + "Not supported on this device link type"); + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct devlink_param mlx5_nv_param_devlink_port_params[] = { + DEVLINK_PARAM_GENERIC(KEEP_LINK_UP, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_nv_port_param_keep_link_up_get, + mlx5_nv_port_param_keep_link_up_set, + mlx5_nv_port_param_keep_link_up_validate), +}; + +int mlx5_nv_port_param_register(struct mlx5_core_dev *dev, + struct devlink_port *port) +{ + size_t params_count; + + if (!mlx5_core_is_pf(dev)) + return 0; + params_count = ARRAY_SIZE(mlx5_nv_param_devlink_port_params); + return devlink_port_params_register(port, + mlx5_nv_param_devlink_port_params, + params_count); +} + +void mlx5_nv_port_param_unregister(struct mlx5_core_dev *dev, + struct devlink_port *port) +{ + size_t params_count; + + if (!mlx5_core_is_pf(dev)) + return; + params_count = ARRAY_SIZE(mlx5_nv_param_devlink_port_params); + devlink_port_params_unregister(port, mlx5_nv_param_devlink_port_params, + params_count); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h index 9f4922ff7745..7ed99506c94f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h @@ -9,6 +9,10 @@ int mlx5_nv_param_register_dl_params(struct devlink *devlink); void mlx5_nv_param_unregister_dl_params(struct devlink *devlink); +int mlx5_nv_port_param_register(struct mlx5_core_dev *dev, + struct devlink_port *port); +void mlx5_nv_port_param_unregister(struct mlx5_core_dev *dev, + struct devlink_port *port); #endif -- 2.50.0 From: Saeed Mahameed Centralize devlink param value data validation in one function and fill corresponding extack error messages on validation error. Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- net/devlink/param.c | 84 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 16 deletions(-) diff --git a/net/devlink/param.c b/net/devlink/param.c index 2a222d1bf81c..7b6affea459e 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -422,45 +422,97 @@ devlink_param_type_get_from_info(struct genl_info *info, return 0; } +static int +devlink_param_value_validate(struct genl_info *info, + enum devlink_param_type type) +{ + struct netlink_ext_ack *extack = info->extack; + struct nlattr *param_data; + int len = 0; + + if (type != DEVLINK_PARAM_TYPE_BOOL && + GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_DATA)) + return -EINVAL; + + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + /* bool is the only type that doesn't expect data*/ + if (type == DEVLINK_PARAM_TYPE_BOOL) { + if (param_data && nla_len(param_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Boolean parameter should not have data"); + return -EINVAL; + } + return 0; + } + + if (!param_data) { + NL_SET_ERR_MSG_MOD(extack, "Expected data, got none"); + return -EINVAL; + } + + len = nla_len(param_data); + switch (type) { + case DEVLINK_PARAM_TYPE_U8: + if (len == sizeof(u8)) + return 0; + NL_SET_ERR_MSG_FMT_MOD(extack, + "Expected uint8, got %d bytes", len); + break; + case DEVLINK_PARAM_TYPE_U16: + if (len == sizeof(u16)) + return 0; + NL_SET_ERR_MSG_FMT_MOD(extack, + "Expected uint16, got %d bytes", len); + break; + case DEVLINK_PARAM_TYPE_U32: + if (len == sizeof(u32)) + return 0; + NL_SET_ERR_MSG_FMT_MOD(extack, + "Expected uint32, got %d bytes", len); + break; + case DEVLINK_PARAM_TYPE_STRING: + len = strnlen(nla_data(param_data), nla_len(param_data)); + + if (len < nla_len(param_data) && + len < __DEVLINK_PARAM_MAX_STRING_VALUE) + return 0; + NL_SET_ERR_MSG_MOD(extack, "String too long"); + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, + "Not supported value type %d", type); + break; + } + return -EINVAL; +} + static int devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { struct nlattr *param_data; - int len; - param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; - - if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) + if (devlink_param_value_validate(info, param->type)) return -EINVAL; + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - if (nla_len(param_data) != sizeof(u8)) - return -EINVAL; value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - if (nla_len(param_data) != sizeof(u16)) - return -EINVAL; value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - if (nla_len(param_data) != sizeof(u32)) - return -EINVAL; value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(param_data), nla_len(param_data)); - if (len == nla_len(param_data) || - len >= __DEVLINK_PARAM_MAX_STRING_VALUE) - return -EINVAL; strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - if (param_data && nla_len(param_data)) - return -EINVAL; value->vbool = nla_get_flag(param_data); break; } -- 2.50.0 From: Saeed Mahameed Devlink param value attribute is not defined since devlink is handling the value validating and parsing internally, this allows us to implement multi attribute values without breaking any policies. Devlink param multi-attribute values are considered to be dynamically sized arrays of u32 values, by introducing a new devlink param type DEVLINK_PARAM_TYPE_ARR_U32, driver and user space can set a variable count of u32 values into the DEVLINK_ATTR_PARAM_VALUE_DATA attribute. Implement get/set parsing and add to the internal value structure passed to drivers. This is useful for devices that need to configure a list of values for a specific configuration. example: $ devlink dev param show pci/... name multi-value-param name multi-value-param type driver-specific values: cmode permanent value: 0,1,2,3,4,5,6,7 $ devlink dev param set pci/... name multi-value-param \ value 4,5,6,7,0,1,2,3 cmode permanent Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko --- Documentation/netlink/specs/devlink.yaml | 3 ++ include/net/devlink.h | 7 ++++ include/uapi/linux/devlink.h | 1 + net/devlink/netlink_gen.c | 2 ++ net/devlink/param.c | 42 +++++++++++++++++++++++- 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 606070ae75f5..441fd0aa34f3 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -224,6 +224,9 @@ definitions: value: 10 - name: binary + - + name: u32-array + value: 129 - name: rate-tc-index-max type: const diff --git a/include/net/devlink.h b/include/net/devlink.h index 13331194e143..93bd5cc7911b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -422,12 +422,15 @@ typedef u64 devlink_resource_occ_get_t(void *priv); #define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 +#define __DEVLINK_PARAM_MAX_ARRAY_SIZE 32 + enum devlink_param_type { DEVLINK_PARAM_TYPE_U8 = DEVLINK_VAR_ATTR_TYPE_U8, DEVLINK_PARAM_TYPE_U16 = DEVLINK_VAR_ATTR_TYPE_U16, DEVLINK_PARAM_TYPE_U32 = DEVLINK_VAR_ATTR_TYPE_U32, DEVLINK_PARAM_TYPE_STRING = DEVLINK_VAR_ATTR_TYPE_STRING, DEVLINK_PARAM_TYPE_BOOL = DEVLINK_VAR_ATTR_TYPE_FLAG, + DEVLINK_PARAM_TYPE_ARR_U32 = DEVLINK_VAR_ATTR_TYPE_U32_ARRAY, }; union devlink_param_value { @@ -436,6 +439,10 @@ union devlink_param_value { u32 vu32; char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE]; bool vbool; + struct { + u32 size; + u32 vu32[__DEVLINK_PARAM_MAX_ARRAY_SIZE]; + } arr; }; struct devlink_param_gset_ctx { diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index e72bcc239afd..7597c2481476 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -403,6 +403,7 @@ enum devlink_var_attr_type { DEVLINK_VAR_ATTR_TYPE_BINARY, __DEVLINK_VAR_ATTR_TYPE_CUSTOM_BASE = 0x80, /* Any possible custom types, unrelated to NLA_* values go below */ + DEVLINK_VAR_ATTR_TYPE_U32_ARRAY, }; enum devlink_attr { diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 0c2e58e75022..7201caa7a7ed 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -31,6 +31,8 @@ devlink_attr_param_type_validate(const struct nlattr *attr, case DEVLINK_VAR_ATTR_TYPE_NUL_STRING: fallthrough; case DEVLINK_VAR_ATTR_TYPE_BINARY: + fallthrough; + case DEVLINK_VAR_ATTR_TYPE_U32_ARRAY: return 0; } NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value"); diff --git a/net/devlink/param.c b/net/devlink/param.c index 7b6affea459e..e30d5b54d364 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -225,6 +225,13 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg, nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA)) goto value_nest_cancel; break; + case DEVLINK_PARAM_TYPE_ARR_U32: + for (int i = 0; i < val.arr.size; i++) { + if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, + val.arr.vu32[i])) + goto value_nest_cancel; + } + break; } nla_nest_end(msg, param_value_attr); @@ -428,7 +435,7 @@ devlink_param_value_validate(struct genl_info *info, { struct netlink_ext_ack *extack = info->extack; struct nlattr *param_data; - int len = 0; + int len = 0, rem; if (type != DEVLINK_PARAM_TYPE_BOOL && GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_DATA)) @@ -479,6 +486,28 @@ devlink_param_value_validate(struct genl_info *info, return 0; NL_SET_ERR_MSG_MOD(extack, "String too long"); break; + case DEVLINK_PARAM_TYPE_ARR_U32: + len = 0; + nla_for_each_attr_type(param_data, + DEVLINK_ATTR_PARAM_VALUE_DATA, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + if (nla_len(param_data) != sizeof(u32)) { + NL_SET_ERR_MSG_MOD(extack, + "Array element size must be 4 bytes"); + return -EINVAL; + } + if (++len > __DEVLINK_PARAM_MAX_ARRAY_SIZE) { + NL_SET_ERR_MSG_MOD(extack, + "Array size exceeds maximum"); + return -EINVAL; + } + } + if (len) + return 0; + NL_SET_ERR_MSG_MOD(extack, + "Value array must have at least one entry"); + break; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Not supported value type %d", type); @@ -493,6 +522,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param, union devlink_param_value *value) { struct nlattr *param_data; + int rem, i = 0; if (devlink_param_value_validate(info, param->type)) return -EINVAL; @@ -515,6 +545,16 @@ devlink_param_value_get_from_info(const struct devlink_param *param, case DEVLINK_PARAM_TYPE_BOOL: value->vbool = nla_get_flag(param_data); break; + case DEVLINK_PARAM_TYPE_ARR_U32: { + nla_for_each_attr_type(param_data, + DEVLINK_ATTR_PARAM_VALUE_DATA, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) + value->arr.vu32[i++] = nla_get_u32(param_data); + + value->arr.size = i; + break; + } } return 0; } -- 2.50.0 From: Saeed Mahameed E-Switch hairpin per prio buffers are controlled and configurable by the device, add two devlink params to control them. esw_haripin_per_prio_log_queue_size: p0,p1,....,p7 Log(base 2) of the number of packets descriptors allocated internally for hairpin for IEEE802.1p priorities. 0 means that no descriptors are allocated for this priority and traffic with this priority will be dropped. esw_hairpin_per_prio_log_buf_size: p0,p1,...,p7 Log(base 2) of the buffer size (in bytes) allocated internally for hairpin for IEEE802.1p priorities. 0 means no buffer for this priority and traffic with this priority will be dropped. Signed-off-by: Saeed Mahameed Reviewed-by: Jiri Pirko --- Documentation/networking/devlink/mlx5.rst | 15 + .../net/ethernet/mellanox/mlx5/core/devlink.h | 4 +- .../mellanox/mlx5/core/lib/nv_param.c | 283 ++++++++++++++++++ 3 files changed, 301 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index c9c064de4699..053060de6126 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -161,6 +161,21 @@ parameters. * ``balanced`` : Merges fewer CQEs, resulting in a moderate compression ratio but maintaining a balance between bandwidth savings and performance * ``aggressive`` : Merges more CQEs into a single entry, achieving a higher compression rate and maximizing performance, particularly under high traffic loads + * - ``esw_hairpin_per_prio_log_queue_size`` + - u32 array[8] + - permanent + - each item is log(base 2) of the number of packet descriptors allocated + internally for hairpin for IEEE802.1p priorities. + 0 means that no descriptors are allocated for this priority + and traffic with this priority will be dropped. + + * - ``esw_hairpin_per_prio_log_buf_size`` + - u32 array[8] + - permanent + - each item is log(base 2) of the buffer size (in bytes) allocated internally + for hairpin for IEEE802.1p priorities. + 0 means no buffer for this priority and traffic with this priority will be dropped. + The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` Info versions diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 74bcdfa70361..b2c10ce1eac5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -22,7 +22,9 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, - MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE + MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, + MLX5_DEVLINK_PARAM_ID_ESW_HAIRPIN_DESCRIPTORS, + MLX5_DEVLINK_PARAM_ID_ESW_HAIRPIN_DATA_SIZE, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c index a7578eac2dd0..d1115767dea8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#include + #include "nv_param.h" #include "mlx5_core.h" #include "en.h" enum { + MLX5_CLASS_0_CTRL_ID_NV_INTERNAL_HAIRPIN_CONF = 0x13, + MLX5_CLASS_0_CTRL_ID_NV_INTERNAL_HAIRPIN_CAP = 0x14, MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF = 0x80, MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP = 0x81, MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a, @@ -145,6 +149,19 @@ struct mlx5_ifc_nv_keep_link_up_bits { u8 keep_eth_link_up[0x1]; }; +struct mlx5_ifc_nv_internal_hairpin_cap_bits { + u8 log_max_hpin_total_num_descriptors[0x8]; + u8 log_max_hpin_total_data_size[0x8]; + u8 log_max_hpin_num_descriptor_per_prio[0x8]; + u8 log_max_hpin_data_size_per_prio[0x8]; +}; + +struct mlx5_ifc_nv_internal_hairpin_conf_bits { + u8 log_hpin_num_descriptor[8][0x8]; + + u8 log_hpin_data_size[8][0x8]; +}; + #define MNVDA_HDR_SZ \ (MLX5_ST_SZ_BYTES(mnvda_reg) - \ MLX5_BYTE_OFF(mnvda_reg, configuration_item_data)) @@ -562,6 +579,258 @@ static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id, return 0; } +static int +mlx5_nv_param_read_internal_hairpin_conf(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_INTERNAL_HAIRPIN_CONF); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_internal_hairpin_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int +mlx5_nv_param_read_internal_hairpin_cap(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_INTERNAL_HAIRPIN_CAP); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int +mlx5_nv_param_esw_hairpin_descriptors_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) + +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err, i; + + BUILD_BUG_ON(IEEE_8021QAZ_MAX_TCS > __DEVLINK_PARAM_MAX_ARRAY_SIZE); + + err = mlx5_nv_param_read_internal_hairpin_conf(dev, mnvda, + sizeof(mnvda)); + if (err) + return err; + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + ctx->val.arr.size = IEEE_8021QAZ_MAX_TCS; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + ctx->val.arr.vu32[i] = MLX5_GET(nv_internal_hairpin_conf, data, + log_hpin_num_descriptor[i]); + return 0; +} + +static int +mlx5_nv_param_esw_hairpin_descriptors_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err, i; + + err = mlx5_nv_param_read_internal_hairpin_conf(dev, mnvda, + sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to query internal hairpin conf"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + MLX5_SET(nv_internal_hairpin_conf, data, + log_hpin_num_descriptor[i], ctx->val.arr.vu32[i]); + + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int +mlx5_nv_param_esw_hairpin_descriptors_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + u8 log_max_num_descriptors, log_max_total_descriptors; + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + u16 total = 0; + void *data; + int err, i; + + if (val.arr.size != IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Array size must be %d", + IEEE_8021QAZ_MAX_TCS); + return -EINVAL; + } + err = mlx5_nv_param_read_internal_hairpin_cap(devlink_priv(devlink), + mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to query internal hairpin cap"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + log_max_total_descriptors = + MLX5_GET(nv_internal_hairpin_cap, data, + log_max_hpin_total_num_descriptors); + log_max_num_descriptors = + MLX5_GET(nv_internal_hairpin_cap, data, + log_max_hpin_num_descriptor_per_prio); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (val.arr.vu32[i] <= log_max_num_descriptors) + continue; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max allowed value per prio is %d", + log_max_num_descriptors); + return -ERANGE; + } + + /* Validate total number of descriptors */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_internal_hairpin_conf(devlink_priv(devlink), + mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to query internal hairpin conf"); + return err; + } + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + total += 1 << val.arr.vu32[i]; + + if (total > (1 << log_max_total_descriptors)) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Log max total value allowed is %d", + log_max_total_descriptors); + return -ERANGE; + } + + return 0; +} + +static int +mlx5_nv_param_esw_hairpin_data_size_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err, i; + + err = mlx5_nv_param_read_internal_hairpin_conf(dev, mnvda, + sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + ctx->val.arr.size = IEEE_8021QAZ_MAX_TCS; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + ctx->val.arr.vu32[i] = MLX5_GET(nv_internal_hairpin_conf, data, + log_hpin_data_size[i]); + return 0; +} + +static int +mlx5_nv_param_esw_hairpin_data_size_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + int err, i; + void *data; + + err = mlx5_nv_param_read_internal_hairpin_conf(dev, mnvda, + sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + MLX5_SET(nv_internal_hairpin_conf, data, log_hpin_data_size[i], + ctx->val.arr.vu32[i]); + + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int +mlx5_nv_param_esw_hairpin_data_size_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + u8 log_max_data_size, log_max_total_data_size; + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + unsigned long total = 0; + void *data; + int err, i; + + if (val.arr.size != IEEE_8021QAZ_MAX_TCS) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Array size must be %d", + IEEE_8021QAZ_MAX_TCS); + return -EINVAL; + } + + err = mlx5_nv_param_read_internal_hairpin_cap(devlink_priv(devlink), + mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to query internal hairpin cap"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + log_max_data_size = MLX5_GET(nv_internal_hairpin_cap, data, + log_max_hpin_data_size_per_prio); + log_max_total_data_size = MLX5_GET(nv_internal_hairpin_cap, data, + log_max_hpin_total_data_size); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (val.arr.vu32[i] <= log_max_data_size) + continue; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max allowed value per prio is %d", + log_max_data_size); + return -ERANGE; + } + + /* Validate total data size */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_internal_hairpin_conf(devlink_priv(devlink), + mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to query internal hairpin conf"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + total += 1 << val.arr.vu32[i]; + + if (total > (1 << log_max_total_data_size)) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Log max total value allowed is %d", + log_max_total_data_size); + return -ERANGE; + } + + return 0; +} + static const struct devlink_param mlx5_nv_param_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), mlx5_devlink_enable_sriov_get, @@ -576,6 +845,20 @@ static const struct devlink_param mlx5_nv_param_devlink_params[] = { mlx5_nv_param_devlink_cqe_compress_get, mlx5_nv_param_devlink_cqe_compress_set, mlx5_nv_param_devlink_cqe_compress_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_HAIRPIN_DESCRIPTORS, + "esw_hairpin_per_prio_log_queue_size", + DEVLINK_PARAM_TYPE_ARR_U32, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_nv_param_esw_hairpin_descriptors_get, + mlx5_nv_param_esw_hairpin_descriptors_set, + mlx5_nv_param_esw_hairpin_descriptors_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_HAIRPIN_DATA_SIZE, + "esw_hairpin_per_prio_log_buf_size", + DEVLINK_PARAM_TYPE_ARR_U32, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_nv_param_esw_hairpin_data_size_get, + mlx5_nv_param_esw_hairpin_data_size_set, + mlx5_nv_param_esw_hairpin_data_size_validate), }; int mlx5_nv_param_register_dl_params(struct devlink *devlink) -- 2.50.0