From: Cosmin Ratiu Up to now, rate groups could only contain vports from the same E-Switch. This patch relaxes that restriction if the device supports it (HCA_CAP.esw_cross_esw_sched == true) and the right conditions are met: - Link Aggregation (LAG) is enabled. - The E-Switches are from the same shared devlink device. This patch does not yet enable cross-esw scheduling, it's just the last preparatory patch. Signed-off-by: Cosmin Ratiu Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 114 +++++++++++++----- 1 file changed, 81 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index f86d7c50db42..3c8716b0644b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -50,7 +50,9 @@ struct mlx5_esw_sched_node { enum sched_node_type type; /* The eswitch this node belongs to. */ struct mlx5_eswitch *esw; - /* The children nodes of this node, empty list for leaf nodes. */ + /* The children nodes of this node, empty list for leaf nodes. + * Can be from multiple E-Switches. + */ struct list_head children; /* Valid only if this node is associated with a vport. */ struct mlx5_vport *vport; @@ -419,6 +421,7 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, struct mlx5_esw_sched_node *parent = vport_node->parent; u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_node->esw->dev; + struct mlx5_vport *vport = vport_node->vport; void *attr; if (!mlx5_qos_element_type_supported( @@ -430,11 +433,18 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); + MLX5_SET(vport_element, attr, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent ? parent->ix : vport_node->esw->qos.root_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate); + if (vport->dev != dev) { + /* The port is assigned to a node on another eswitch. */ + MLX5_SET(vport_element, attr, eswitch_owner_vhca_id_valid, + true); + MLX5_SET(vport_element, attr, eswitch_owner_vhca_id, + MLX5_CAP_GEN(vport->dev, vhca_id)); + } return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack); } @@ -446,6 +456,7 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_tc_node->esw->dev; + struct mlx5_vport *vport = vport_tc_node->vport; void *attr; if (!mlx5_qos_element_type_supported( @@ -457,8 +468,7 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_tc_element, attr, vport_number, - vport_tc_node->vport->vport); + MLX5_SET(vport_tc_element, attr, vport_number, vport->vport); MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc); MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id, rate_limit_elem_ix); @@ -466,6 +476,13 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, vport_tc_node->parent->ix); MLX5_SET(scheduling_context, sched_ctx, bw_share, vport_tc_node->bw_share); + if (vport->dev != dev) { + /* The port is assigned to a node on another eswitch. */ + MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id_valid, + true); + MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id, + MLX5_CAP_GEN(vport->dev, vhca_id)); + } return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx, extack); @@ -1194,6 +1211,29 @@ static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type, return 0; } +static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, + u32 *tc_bw) +{ + int i, num_tcs = esw_qos_num_tcs(esw->dev); + + for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) + if (tc_bw[i]) + return false; + + return true; +} + +static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, + u32 *tc_bw) +{ + struct mlx5_esw_sched_node *node = vport->qos.sched_node; + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw = (node && node->parent) ? node->parent->esw : esw; + + return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); +} + static int esw_qos_vport_update(struct mlx5_vport *vport, enum sched_node_type type, struct mlx5_esw_sched_node *parent, @@ -1227,6 +1267,12 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw, extack); + if (!esw_qos_validate_unsupported_tc_bw(parent->esw, + curr_tc_bw)) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported traffic classes on the new device"); + return -EOPNOTSUPP; + } } return err; @@ -1575,30 +1621,6 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return 0; } -static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, - u32 *tc_bw) -{ - int i, num_tcs = esw_qos_num_tcs(esw->dev); - - for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) { - if (tc_bw[i]) - return false; - } - - return true; -} - -static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, - u32 *tc_bw) -{ - struct mlx5_esw_sched_node *node = vport->qos.sched_node; - struct mlx5_eswitch *esw = vport->dev->priv.eswitch; - - esw = (node && node->parent) ? node->parent->esw : esw; - - return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); -} - static bool esw_qos_tc_bw_disabled(u32 *tc_bw) { int i; @@ -1803,18 +1825,44 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, return 0; } +static int +mlx5_esw_validate_cross_esw_scheduling(struct mlx5_eswitch *esw, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + if (!parent || esw == parent->esw) + return 0; + + if (!MLX5_CAP_QOS(esw->dev, esw_cross_esw_sched)) { + NL_SET_ERR_MSG_MOD(extack, + "Cross E-Switch scheduling is not supported"); + return -EOPNOTSUPP; + } + if (esw->dev->shd != parent->esw->dev->shd) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot add vport to a parent belonging to a different device"); + return -EOPNOTSUPP; + } + if (!mlx5_lag_is_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Cross E-Switch scheduling requires LAG to be activated"); + return -EOPNOTSUPP; + } + + return 0; +} + static int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; - int err = 0; + int err; - if (parent && parent->esw != esw) { - NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported"); - return -EOPNOTSUPP; - } + err = mlx5_esw_validate_cross_esw_scheduling(esw, parent, extack); + if (err) + return err; if (!vport->qos.sched_node && parent) { enum sched_node_type type; -- 2.31.1