From: Carolina Jubran When creating IPsec flow tables with tunnel mode enabled, the driver uses mlx5_eswitch_block_encap() to prevent tunnel encapsulation conflicts across different domains (NIC_RX/NIC_TX and FDB), since the firmware doesn’t allow both at the same time. Currently, the driver attempts to reserve tunnel mode unconditionally for both NIC and FDB IPsec tables. This can lead to conflicting tunnel mode setups, for example, if a flow table was created in the FDB domain with tunnel offload enabled, and we later try to create another one in the NIC, or vice versa. To resolve this, adjust the blocking logic so that tunnel mode is only reserved by NIC flows. This ensures that tunnel offload is exclusively used in either the NIC or the FDB, and avoids unintended offload conflicts. Fixes: 1762f132d542 ("net/mlx5e: Support IPsec packet offload for RX in switchdev mode") Fixes: c6c2bf5db4ea ("net/mlx5e: Support IPsec packet offload for TX in switchdev mode") Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan --- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 8 ++++++-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++-- .../mellanox/mlx5/core/eswitch_offloads.c | 18 ++++++++++-------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 6ccfc2af07b7..0bc080274584 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1069,7 +1069,9 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, /* Create FT */ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) - rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + rx->allow_tunnel_mode = + mlx5_eswitch_block_encap(mdev, rx == ipsec->rx_esw); + if (rx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 1, 2, flags); @@ -1310,7 +1312,9 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, goto err_status_rule; if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) - tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); + tx->allow_tunnel_mode = + mlx5_eswitch_block_encap(mdev, tx == ipsec->tx_esw); + if (tx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 1, 4, flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index df3756d7e52e..16eb99aba2a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -879,7 +879,7 @@ void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw); -bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev); +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev, bool from_fdb); void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev); int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev); @@ -974,7 +974,8 @@ mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw) return 0; } -static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +static inline bool +mlx5_eswitch_block_encap(struct mlx5_core_dev *dev, bool from_fdb) { return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 52c3de24bea3..4cf995be127d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4006,23 +4006,25 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } -bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev) +bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev, bool from_fdb) { struct mlx5_eswitch *esw = dev->priv.eswitch; + enum devlink_eswitch_encap_mode encap; + bool allow_tunnel = false; if (!mlx5_esw_allowed(esw)) return true; down_write(&esw->mode_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY && - esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { - up_write(&esw->mode_lock); - return false; + encap = esw->offloads.encap; + if (esw->mode == MLX5_ESWITCH_LEGACY || + (encap == DEVLINK_ESWITCH_ENCAP_MODE_NONE && !from_fdb)) { + allow_tunnel = true; + esw->offloads.num_block_encap++; } - - esw->offloads.num_block_encap++; up_write(&esw->mode_lock); - return true; + + return allow_tunnel; } void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev) -- 2.31.1 From: Carolina Jubran When configuring IPsec packet offload in tunnel mode, the driver tries to create tunnel reformat objects unconditionally. This is incorrect, because tunnel mode is only permitted under specific encapsulation settings, and that decision is already made when the flow table is created. The offending commit attempted to block this case in the state add path, but the check there happens too late and does not prevent the reformat from being configured. Fix by taking short reservations for both the eswitch mode and the encap at the start of state setup. This preserves the block ordering (mode --> encap) used later: the mode is blocked during RX/TX get, and the encap is blocked during flow-table creation. This lets us fail early if either reservation cannot be obtained, it means a mode transition is underway or a conflicting configuration already owns encap. If both succeed, the flow-table path later takes the ownership and the reservations are released on exit. Fixes: 146c196b60e4 ("net/mlx5e: Create IPsec table with tunnel support only when encap is disabled") Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan --- .../mellanox/mlx5/core/en_accel/ipsec.c | 38 +++++++++++++------ .../mellanox/mlx5/core/en_accel/ipsec.h | 2 +- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 24 +++++++----- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 00e77c71e201..0a4fb8c92268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -772,6 +772,7 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; + bool allow_tunnel_mode = false; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; gfp_t gfp; @@ -803,6 +804,20 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, goto err_xfrm; } + if (mlx5_eswitch_block_mode(priv->mdev)) + goto unblock_ipsec; + + if (x->props.mode == XFRM_MODE_TUNNEL && + x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { + allow_tunnel_mode = mlx5e_ipsec_fs_tunnel_allowed(sa_entry); + if (!allow_tunnel_mode) { + NL_SET_ERR_MSG_MOD(extack, + "Packet offload tunnel mode is disabled due to encap settings"); + err = -EINVAL; + goto unblock_mode; + } + } + /* check esn */ if (x->props.flags & XFRM_STATE_ESN) mlx5e_ipsec_update_esn_state(sa_entry); @@ -817,7 +832,7 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, err = mlx5_ipsec_create_work(sa_entry); if (err) - goto unblock_ipsec; + goto unblock_encap; err = mlx5e_ipsec_create_dwork(sa_entry); if (err) @@ -832,14 +847,6 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, if (err) goto err_hw_ctx; - if (x->props.mode == XFRM_MODE_TUNNEL && - x->xso.type == XFRM_DEV_OFFLOAD_PACKET && - !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) { - NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings"); - err = -EINVAL; - goto err_add_rule; - } - /* We use *_bh() variant because xfrm_timer_handler(), which runs * in softirq context, can reach our state delete logic and we need * xa_erase_bh() there. @@ -855,8 +862,7 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, MLX5_IPSEC_RESCHED); - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && - x->props.mode == XFRM_MODE_TUNNEL) { + if (allow_tunnel_mode) { xa_lock_bh(&ipsec->sadb); __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, MLX5E_IPSEC_TUNNEL_SA); @@ -865,6 +871,11 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, out: x->xso.offload_handle = (unsigned long)sa_entry; + if (allow_tunnel_mode) + mlx5_eswitch_unblock_encap(priv->mdev); + + mlx5_eswitch_unblock_mode(priv->mdev); + return 0; err_add_rule: @@ -877,6 +888,11 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, if (sa_entry->work) kfree(sa_entry->work->data); kfree(sa_entry->work); +unblock_encap: + if (allow_tunnel_mode) + mlx5_eswitch_unblock_encap(priv->mdev); +unblock_mode: + mlx5_eswitch_unblock_mode(priv->mdev); unblock_ipsec: mlx5_eswitch_unblock_ipsec(priv->mdev); err_xfrm: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 23703f28386a..5d7c15abfcaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -319,7 +319,7 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry); -bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry); +bool mlx5e_ipsec_fs_tunnel_allowed(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 0bc080274584..bf1d2769d4f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2850,18 +2850,24 @@ void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry)); } -bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) +bool mlx5e_ipsec_fs_tunnel_allowed(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; - struct mlx5e_ipsec_rx *rx; - struct mlx5e_ipsec_tx *tx; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct xfrm_state *x = sa_entry->x; + bool from_fdb; - rx = ipsec_rx(sa_entry->ipsec, attrs->addrs.family, attrs->type); - tx = ipsec_tx(sa_entry->ipsec, attrs->type); - if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) - return tx->allow_tunnel_mode; + if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) { + struct mlx5e_ipsec_tx *tx = ipsec_tx(ipsec, x->xso.type); + + from_fdb = (tx == ipsec->tx_esw); + } else { + struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, x->props.family, + x->xso.type); + + from_fdb = (rx == ipsec->rx_esw); + } - return rx->allow_tunnel_mode; + return mlx5_eswitch_block_encap(ipsec->mdev, from_fdb); } void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, -- 2.31.1 From: Cosmin Ratiu PSP support requires a set of cap bits to be set, otherwise an init error is logged. But logging an error when PSP cannot be initialized is too much, and not in line with other features. If a feature cannot be initialized because it is not supported, that's not an error. An error should only be printed when the feature cannot be initialized because of an actual error. Fixes: 89ee2d92f66c ("net/mlx5e: Support PSP offload functionality") Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c index b4cb131c5f81..8565cfe8d7dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -893,27 +893,27 @@ int mlx5e_psp_init(struct mlx5e_priv *priv) if (!mlx5_is_psp_device(mdev)) { mlx5_core_dbg(mdev, "PSP offload not supported\n"); - return -EOPNOTSUPP; + return 0; } if (!MLX5_CAP_ETH(mdev, swp)) { mlx5_core_dbg(mdev, "SWP not supported\n"); - return -EOPNOTSUPP; + return 0; } if (!MLX5_CAP_ETH(mdev, swp_csum)) { mlx5_core_dbg(mdev, "SWP checksum not supported\n"); - return -EOPNOTSUPP; + return 0; } if (!MLX5_CAP_ETH(mdev, swp_csum_l4_partial)) { mlx5_core_dbg(mdev, "SWP L4 partial checksum not supported\n"); - return -EOPNOTSUPP; + return 0; } if (!MLX5_CAP_ETH(mdev, swp_lso)) { mlx5_core_dbg(mdev, "PSP LSO not supported\n"); - return -EOPNOTSUPP; + return 0; } psp = kzalloc(sizeof(*psp), GFP_KERNEL); -- 2.31.1