From: Saeed Mahameed mlx5e_netdev_change_profile can fail to attach a new profile and can fail to rollback to old profile, in such case, we could end up with a dangling netdev with a fully reset netdev_priv. A retry to change profile, e.g. another attempt to call mlx5e_netdev_change_profile via switchdev mode change, will crash trying to access the now NULL priv->mdev. This fix allows mlx5e_netdev_change_profile() to handle previous failures and an empty priv, by not assuming priv is valid. Pass netdev and mdev to all flows requiring mlx5e_netdev_change_profile() and avoid passing priv. In mlx5e_netdev_change_profile() check if current priv is valid, and if not, just attach the new profile without trying to access the old one. This fixes the following oops, when enabling switchdev mode for the 2nd time after first time failure: ## Enabling switchdev mode first time: mlx5_core 0012:03:00.1: E-Switch: Supported tc chains and prios offload workqueue: Failed to create a rescuer kthread for wq "mlx5e": -EINTR mlx5_core 0012:03:00.1: mlx5e_netdev_init_profile:6214:(pid 37199): mlx5e_priv_init failed, err=-12 mlx5_core 0012:03:00.1 gpu3rdma1: mlx5e_netdev_change_profile: new profile init failed, -12 workqueue: Failed to create a rescuer kthread for wq "mlx5e": -EINTR mlx5_core 0012:03:00.1: mlx5e_netdev_init_profile:6214:(pid 37199): mlx5e_priv_init failed, err=-12 mlx5_core 0012:03:00.1 gpu3rdma1: mlx5e_netdev_change_profile: failed to rollback to orig profile, -12 ^^^^^^^^ mlx5_core 0000:00:03.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0) ## retry: Enabling switchdev mode 2nd time: mlx5_core 0000:00:03.0: E-Switch: Supported tc chains and prios offload BUG: kernel NULL pointer dereference, address: 0000000000000038 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 13 UID: 0 PID: 520 Comm: devlink Not tainted 6.18.0-rc4+ #91 PREEMPT(voluntary) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 RIP: 0010:mlx5e_detach_netdev+0x3c/0x90 Code: 50 00 00 f0 80 4f 78 02 48 8b bf e8 07 00 00 48 85 ff 74 16 48 8b 73 78 48 d1 ee 83 e6 01 83 f6 01 40 0f b6 f6 e8 c4 42 00 00 <48> 8b 45 38 48 85 c0 74 08 48 89 df e8 cc 47 40 1e 48 8b bb f0 07 RSP: 0018:ffffc90000673890 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8881036a89c0 RCX: 0000000000000000 RDX: ffff888113f63800 RSI: ffffffff822fe720 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000002dcd R09: 0000000000000000 R10: ffffc900006738e8 R11: 00000000ffffffff R12: 0000000000000000 R13: 0000000000000000 R14: ffff8881036a89c0 R15: 0000000000000000 FS: 00007fdfb8384740(0000) GS:ffff88856a9d6000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000038 CR3: 0000000112ae0005 CR4: 0000000000370ef0 Call Trace: mlx5e_netdev_change_profile+0x45/0xb0 mlx5e_vport_rep_load+0x27b/0x2d0 mlx5_esw_offloads_rep_load+0x72/0xf0 esw_offloads_enable+0x5d0/0x970 mlx5_eswitch_enable_locked+0x349/0x430 ? is_mp_supported+0x57/0xb0 mlx5_devlink_eswitch_mode_set+0x26b/0x430 devlink_nl_eswitch_set_doit+0x6f/0xf0 genl_family_rcv_msg_doit+0xe8/0x140 genl_rcv_msg+0x18b/0x290 ? __pfx_devlink_nl_pre_doit+0x10/0x10 ? __pfx_devlink_nl_eswitch_set_doit+0x10/0x10 ? __pfx_devlink_nl_post_doit+0x10/0x10 ? __pfx_genl_rcv_msg+0x10/0x10 netlink_rcv_skb+0x52/0x100 genl_rcv+0x28/0x40 netlink_unicast+0x282/0x3e0 ? __alloc_skb+0xd6/0x190 netlink_sendmsg+0x1f7/0x430 __sys_sendto+0x213/0x220 ? __sys_recvmsg+0x6a/0xd0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x50/0x1f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fdfb8495047 Fixes: c4d7eb57687f ("net/mxl5e: Add change profile method") Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 ++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 48 +++++++++++++------ .../net/ethernet/mellanox/mlx5/core/en_rep.c | 11 ++--- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 262dc032e276..f42256768700 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1243,9 +1243,12 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); -int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, - const struct mlx5e_profile *new_profile, void *new_ppriv); -void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); +int mlx5e_netdev_change_profile(struct net_device *netdev, + struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, + void *new_ppriv); +void mlx5e_netdev_attach_nic_profile(struct net_device *netdev, + struct mlx5_core_dev *mdev); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 07fc4d2c8fad..e50525b771bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -6584,19 +6584,28 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde return err; } -int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, - const struct mlx5e_profile *new_profile, void *new_ppriv) +int mlx5e_netdev_change_profile(struct net_device *netdev, + struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, + void *new_ppriv) { - const struct mlx5e_profile *orig_profile = priv->profile; - struct net_device *netdev = priv->netdev; - struct mlx5_core_dev *mdev = priv->mdev; - void *orig_ppriv = priv->ppriv; + struct mlx5e_priv *priv = netdev_priv(netdev); + const struct mlx5e_profile *orig_profile; int err, rollback_err; + void *orig_ppriv; - /* cleanup old profile */ - mlx5e_detach_netdev(priv); - priv->profile->cleanup(priv); - mlx5e_priv_cleanup(priv); + orig_profile = priv->profile; + orig_ppriv = priv->ppriv; + + /* NULL could happen if previous change_profile failed to rollback */ + if (priv->profile) { + WARN_ON_ONCE(priv->mdev != mdev); + /* cleanup old profile */ + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_priv_cleanup(priv); + } + /* priv members are not valid from this point ... */ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); @@ -6613,16 +6622,25 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, return 0; rollback: + if (!orig_profile) { + netdev_warn(netdev, "no original profile to rollback to\n"); + priv->profile = NULL; + return err; + } + rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); - if (rollback_err) - netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n", - __func__, rollback_err); + if (rollback_err) { + netdev_err(netdev, "failed to rollback to orig profile, %d\n", + rollback_err); + priv->profile = NULL; + } return err; } -void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) +void mlx5e_netdev_attach_nic_profile(struct net_device *netdev, + struct mlx5_core_dev *mdev) { - mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); + mlx5e_netdev_change_profile(netdev, mdev, &mlx5e_nic_profile, NULL); } void mlx5e_destroy_netdev(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ee9595109649..52d3ad0b9cd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1508,17 +1508,16 @@ mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep * { struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev; - struct mlx5e_priv *priv; int err; netdev = mlx5_uplink_netdev_get(dev); if (!netdev) return 0; - priv = netdev_priv(netdev); - rpriv->netdev = priv->netdev; - err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, - rpriv); + /* must not use netdev_priv(netdev), it might not be initialized yet */ + rpriv->netdev = netdev; + err = mlx5e_netdev_change_profile(netdev, dev, + &mlx5e_uplink_rep_profile, rpriv); mlx5_uplink_netdev_put(dev, netdev); return err; } @@ -1546,7 +1545,7 @@ mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_SWITCH_LEGACY)) unregister_netdev(netdev); - mlx5e_netdev_attach_nic_profile(priv); + mlx5e_netdev_attach_nic_profile(netdev, priv->mdev); } static int -- 2.52.0