From: Alexandre Cassen Remote IPsec tunnel endpoint may refer to a network segment that is not directly connected to the host. In such a case, IPsec tunnel endpoints are connected to a router and reachable via a routing path. In IPsec packet offload mode, HW is initialized with the MAC address of both IPsec tunnel endpoints. Extend the current IPsec init MACs procedure to resolve nexthop for routed networks. Direct neighbour lookup and probe is still used for directly connected networks and as a fallback mechanism if fib lookup fails. Signed-off-by: Alexandre Cassen Signed-off-by: Leon Romanovsky Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan --- .../mellanox/mlx5/core/en_accel/ipsec.c | 82 ++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 77f61cd28a79..a486684c8e60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "en.h" #include "eswitch.h" @@ -260,8 +261,14 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, { struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); struct net_device *netdev = sa_entry->dev; + struct mlx5e_ipsec_addr *addrs = &attrs->addrs; + struct xfrm_state *x = sa_entry->x; + struct dst_entry *rt_dst_entry; + struct flowi4 fl4 = {}; + struct flowi6 fl6 = {}; struct neighbour *n; u8 addr[ETH_ALEN]; + struct rtable *rt; const void *pkey; u8 *dst, *src; @@ -274,18 +281,89 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; dst = attrs->smac; - pkey = &attrs->addrs.saddr.a4; + + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->saddr.a4; + fl4.saddr = addrs->daddr.a4; + pkey = &addrs->saddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16); + pkey = &addrs->saddr.a6; + break; + default: + return; + } break; case XFRM_DEV_OFFLOAD_OUT: src = attrs->smac; dst = attrs->dmac; - pkey = &attrs->addrs.daddr.a4; + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->daddr.a4; + fl4.saddr = addrs->saddr.a4; + pkey = &addrs->daddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16); + pkey = &addrs->daddr.a6; + break; + default: + return; + } break; default: return; } ether_addr_copy(src, addr); + + /* Destination can refer to a routed network, so perform FIB lookup + * to resolve nexthop and get its MAC. Neighbour resolution is used as + * fallback. + */ + switch (addrs->family) { + case AF_INET: + rt = ip_route_output_key(dev_net(netdev), &fl4); + if (IS_ERR(rt)) + goto neigh; + + if (rt->rt_type != RTN_UNICAST) { + ip_rt_put(rt); + goto neigh; + } + rt_dst_entry = &rt->dst; + break; + case AF_INET6: + rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( + dev_net(netdev), NULL, &fl6, NULL); + if (IS_ERR(rt_dst_entry)) + goto neigh; + break; + default: + return; + } + + n = dst_neigh_lookup(rt_dst_entry, pkey); + if (!n) { + dst_release(rt_dst_entry); + goto neigh; + } + + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + dst_release(rt_dst_entry); + neigh_release(n); + return; + +neigh: n = neigh_lookup(&arp_tbl, pkey, netdev); if (!n) { n = neigh_create(&arp_tbl, pkey, netdev); -- 2.31.1 From: Shay Drory When NR_CPUS is set to 8192 or higher, the current implementation that allocates struct cpu_mask on the stack leads to a compiler warning about the frame size[1]. This patch addresses the issue by moving the allocation of struct cpu_mask to the heap. [1] drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c: In function ‘irq_pool_request_irq’: drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c:70:1: warning: the frame size of 1048 bytes is larger than 1024 bytes [-Wframe-larger-than=] 70 | } | ^ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function ‘mlx5_ctrl_irq_request’: drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:478:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] 478 | } | ^ drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c: In function ‘mlx5_irq_request_vector’: drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c:597:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] 597 | } | ^ drivers/net/ethernet/mellanox/mlx5/core/eq.c: In function ‘comp_irq_request_sf’: drivers/net/ethernet/mellanox/mlx5/core/eq.c:925:1: warning: the frame size of 1064 bytes is larger than 1024 bytes [-Wframe-larger-than=] 925 | } | ^ drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c: In function ‘irq_pool_request_irq’: drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c:74:1: warning: the frame size of 1048 bytes is larger than 1024 bytes [-Wframe-larger-than=] 74 | } | ^ Signed-off-by: Shay Drory Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/all/20250620111010.3364606-1-arnd@kernel.org Reviewed-by: Maher Sanalla Reviewed-by: Moshe Shemesh Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 19 +++++++--- .../mellanox/mlx5/core/irq_affinity.c | 21 ++++++++--- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 37 +++++++++++++------ 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 66dce17219a6..779efc186255 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -876,19 +876,25 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct irq_affinity_desc af_desc = {}; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; /* In case SF irq pool does not exist, fallback to the PF irqs*/ if (!mlx5_irq_pool_is_sf_pool(pool)) return comp_irq_request_pci(dev, vecidx); - af_desc.is_managed = false; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); - if (IS_ERR(irq)) + af_desc = kzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return -ENOMEM; + + af_desc->is_managed = false; + cpumask_copy(&af_desc->mask, cpu_online_mask); + cpumask_andnot(&af_desc->mask, &af_desc->mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); + if (IS_ERR(irq)) { + kfree(af_desc); return PTR_ERR(irq); + } cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -896,6 +902,7 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + kfree(af_desc); return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 2691d88cdee1..d0a845579d33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -47,29 +47,38 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { - struct irq_affinity_desc auto_desc = {}; + struct irq_affinity_desc *auto_desc; struct mlx5_irq *irq; u32 irq_index; int err; + auto_desc = kzalloc(sizeof(*auto_desc), GFP_KERNEL); + if (!auto_desc) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); - if (err) - return ERR_PTR(err); + if (err) { + irq = ERR_PTR(err); + goto out; + } if (pool->irqs_per_cpu) { if (cpumask_weight(&af_desc->mask) > 1) /* if req_mask contain more then one CPU, set the least loadad CPU * of req_mask */ cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), - &auto_desc.mask); + &auto_desc->mask); else cpu_get(pool, cpumask_first(&af_desc->mask)); } irq = mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, - NULL); + cpumask_empty(&auto_desc->mask) ? + af_desc : auto_desc, NULL); if (IS_ERR(irq)) xa_erase(&pool->irqs, irq_index); + +out: + kfree(auto_desc); return irq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 40024cfa3099..ac00aa29e61a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -470,26 +470,32 @@ void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq) struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); - struct irq_affinity_desc af_desc; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; - cpumask_copy(&af_desc.mask, cpu_online_mask); - af_desc.is_managed = false; + af_desc = kzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); + + cpumask_copy(&af_desc->mask, cpu_online_mask); + af_desc->is_managed = false; if (!mlx5_irq_pool_is_sf_pool(pool)) { /* In case we are allocating a control IRQ from a pci device's pool. * This can happen also for a SF if the SFs pool is empty. */ if (!pool->xa_num_irqs.max) { - cpumask_clear(&af_desc.mask); + cpumask_clear(&af_desc->mask); /* In case we only have a single IRQ for PF/VF */ - cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), + &af_desc->mask); } /* Allocate the IRQ in index 0. The vector was already allocated */ - irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); + irq = irq_pool_request_vector(pool, 0, af_desc, NULL); } else { - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); } + kfree(af_desc); return irq; } @@ -548,16 +554,23 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, { struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; - struct irq_affinity_desc af_desc; int offset = MLX5_IRQ_VEC_COMP_BASE; + struct irq_affinity_desc *af_desc; + struct mlx5_irq *irq; + + af_desc = kzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); if (!pool->xa_num_irqs.max) offset = 0; - af_desc.is_managed = false; - cpumask_clear(&af_desc.mask); - cpumask_set_cpu(cpu, &af_desc.mask); - return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); + af_desc->is_managed = false; + cpumask_clear(&af_desc->mask); + cpumask_set_cpu(cpu, &af_desc->mask); + irq = mlx5_irq_request(dev, vecidx + offset, af_desc, rmap); + kfree(af_desc); + return irq; } static struct mlx5_irq_pool * -- 2.31.1 From: Feng Liu Underneath "TIS Config" tag expose TIS diagnostic information. Expose the tisn of each TC under each lag port. $ sudo devlink health diagnose auxiliary/mlx5_core.eth.2/131072 reporter tx ...... TIS Config: lag port: 0 tc: 0 tisn: 0 lag port: 1 tc: 0 tisn: 8 ...... Signed-off-by: Feng Liu Reviewed-by: Aya Levin Signed-off-by: Tariq Toukan --- .../mellanox/mlx5/core/en/reporter_tx.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index bd96988e102c..85d5cb39b107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -311,6 +311,30 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static void +mlx5e_tx_reporter_diagnose_tis_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + u8 num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); + u32 tc, i, tisn; + + devlink_fmsg_arr_pair_nest_start(fmsg, "TIS Config"); + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < num_tc; tc++) { + tisn = mlx5e_profile_get_tisn(priv->mdev, priv, + priv->profile, i, tc); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_u32_pair_put(fmsg, "lag port", i); + devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + devlink_fmsg_u32_pair_put(fmsg, "tisn", tisn); + devlink_fmsg_obj_nest_end(fmsg); + } + } + devlink_fmsg_arr_pair_nest_end(fmsg); +} + static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack) @@ -326,6 +350,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); + mlx5e_tx_reporter_diagnose_tis_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); for (i = 0; i < priv->channels.num; i++) { -- 2.31.1