From: Shay Drory Enable TC flow steering for SD LAG mode by extending multiport eligibility checks and peer flow handling. SD LAG operates similarly to MPESW for TC offloads - flows on secondary devices need peer flow creation on the primary, and multiport forwarding rules are eligible when either MPESW or SD LAG is active. Add mlx5_lag_is_sd() helper to query SD LAG mode, and mlx5_sd_is_primary() to identify the primary device. Redirect uplink priv/proto_dev queries to the primary device's eswitch in SD configurations. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan --- .../ethernet/mellanox/mlx5/core/en/tc_priv.h | 4 ++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 53 +++++++++++++++++-- .../mellanox/mlx5/core/eswitch_offloads.c | 8 +++ .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 14 +++++ .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 + .../net/ethernet/mellanox/mlx5/core/lib/sd.c | 15 +++++- .../net/ethernet/mellanox/mlx5/core/lib/sd.h | 2 + 7 files changed, 92 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index a0434ceebe69..28cab4bf525c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -104,6 +104,10 @@ struct mlx5e_tc_flow { * due to missing route) */ struct list_head peer_flows; /* flows on peer */ + int peer_index; /* peer-flow index pinned at add time, used at del + * time so removal is independent of LAG state + * changes between add and del. + */ struct net_device *orig_dev; /* netdev adding flow first */ int tmp_entry_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2a16368a948e..910492eb51f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -71,6 +71,7 @@ #include #include "lag/lag.h" #include "lag/mp.h" +#include "lib/sd.h" #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) @@ -2132,7 +2133,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, mutex_unlock(&esw->offloads.peer_mutex); list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { - if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev)) + if (peer_index != peer_flow->peer_index) continue; list_del(&peer_flow->peer_flows); @@ -4196,9 +4197,26 @@ static bool is_lag_dev(struct mlx5e_priv *priv, same_hw_reps(priv, peer_netdev)); } +static bool is_sd_eligible(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + return same_hw_reps(priv, peer_netdev) && + mlx5_lag_is_sd(priv->mdev) && + (mlx5_sd_get_primary(priv->mdev) == + mlx5_sd_get_primary(peer_priv->mdev)); +} + static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); + struct mlx5_core_dev *primary = mlx5_sd_get_primary(priv->mdev); + + if (!primary) + return false; + + return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(primary); } bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, @@ -4207,6 +4225,9 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, if (is_merged_eswitch_vfs(priv, out_dev)) return true; + if (is_sd_eligible(priv, out_dev)) + return true; + if (is_multiport_eligible(priv, out_dev)) return true; @@ -4351,7 +4372,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, return &tc->ht; } -static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow, bool *is_sd) { struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; struct mlx5_flow_attr *attr = flow->attr; @@ -4372,6 +4393,13 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) return true; + if (mlx5_lag_is_sd(esw_attr->in_mdev) && + !mlx5_sd_is_primary(esw_attr->in_mdev)) { + if (!mlx5_lag_is_mpesw(mlx5_sd_get_primary(esw_attr->in_mdev))) + *is_sd = true; + return true; + } + return false; } @@ -4609,6 +4637,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, goto out; } + peer_flow->peer_index = i; list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); @@ -4628,19 +4657,26 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow **__flow) { struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos; + struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *in_rep = rpriv->rep; struct mlx5_core_dev *in_mdev = priv->mdev; struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; + bool is_sd = false; int err; + if (mlx5_lag_is_sd(in_mdev) && !mlx5_lag_is_active(in_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "SD shared FDB not yet active"); + return -EOPNOTSUPP; + } + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, in_mdev); if (IS_ERR(flow)) return PTR_ERR(flow); - if (!is_peer_flow_needed(flow)) { + if (!is_peer_flow_needed(flow, &is_sd)) { *__flow = flow; return 0; } @@ -4651,6 +4687,15 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, } mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { + if (is_sd) { + /* SD shared FDB: only the matching SD primary. */ + if (mlx5_sd_get_primary(in_mdev) != + mlx5_sd_get_primary(peer_esw->dev)) + continue; + } else { + if (!mlx5_sd_is_primary(peer_esw->dev)) + continue; + } err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); if (err) goto peer_clean; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d65f30bb2f80..830fc910a080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4690,8 +4690,11 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps_nested); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { + struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev); struct mlx5_eswitch_rep *rep; + if (primary) + esw = primary->priv.eswitch; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_data[rep_type].priv; } @@ -4713,6 +4716,11 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { + struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev); + + if (primary) + esw = primary->priv.eswitch; + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a2c7e2927431..dd3f18f85466 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -2425,6 +2425,20 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_sriov); +bool mlx5_lag_is_sd(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sd(ldev, dev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} + bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index cbe201529661..e412bb85027c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -202,6 +202,7 @@ static inline bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) } #endif bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); +bool mlx5_lag_is_sd(struct mlx5_core_dev *dev); int mlx5_lag_demux_init(struct mlx5_core_dev *dev, struct mlx5_flow_table_attr *ft_attr); void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index ec606851feb8..25286ecd724e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -49,13 +49,16 @@ static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) return sd->host_buses; } -static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) +struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); if (!sd) return dev; + if (!mlx5_devcom_comp_is_ready(sd->devcom)) + return NULL; + return sd->primary ? dev : sd->primary_dev; } @@ -69,6 +72,16 @@ struct mlx5_devcom_comp_dev *mlx5_sd_get_devcom(struct mlx5_core_dev *dev) return sd->devcom; } +bool mlx5_sd_is_primary(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + if (!sd) + return true; + + return sd->primary; +} + struct mlx5_core_dev * mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h index bf59903ab23f..011702ff6f02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h @@ -10,6 +10,8 @@ struct mlx5_sd; +struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev); +bool mlx5_sd_is_primary(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx); int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix); int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix); -- 2.44.0