From: Parav Pandit Introduce a new state to eswitch (active/inactive) and enable user to set it dynamically. A user can start the eswitch in switchdev mode in either active or inactive state. Active: Traffic is enabled on this eswitch FDB. Inactive: Traffic is ignored/dropped on this eswitch FDB. An example of starting the switch in active state is following. 1. devlink dev eswitch set pci/0000:08:00.1 mode switchdev (default is active, backward compatible) 2. devlink dev eswitch set pci/0000:08:00.1 mode switchdev state active To bring up the esw in 'inactive' state: devlink dev eswitch set pci/0000:08:00.1 mode switchdev state inactive Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed Signed-off-by: Adithya Jayachandran --- Documentation/netlink/specs/devlink.yaml | 13 ++++++++ .../devlink/devlink-eswitch-attr.rst | 15 ++++++++++ include/net/devlink.h | 5 ++++ include/uapi/linux/devlink.h | 7 +++++ net/devlink/dev.c | 30 +++++++++++++++++++ net/devlink/netlink_gen.c | 5 ++-- 6 files changed, 73 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 3db59c965869..4242a3431320 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -119,6 +119,14 @@ definitions: name: none - name: basic + - + type: enum + name: eswitch-state + entries: + - + name: none + - + name: basic - type: enum name: dpipe-header-id @@ -857,6 +865,10 @@ attribute-sets: name: health-reporter-burst-period type: u64 doc: Time (in msec) for recoveries before starting the grace period. + - + name: eswitch-state + type: u8 + enum: eswitch-state - name: dl-dev-stats subset-of: devlink @@ -1609,6 +1621,7 @@ operations: - eswitch-mode - eswitch-inline-mode - eswitch-encap-mode + - eswitch-state - name: eswitch-set diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst index 08bb39ab1528..13ad1ed300ee 100644 --- a/Documentation/networking/devlink/devlink-eswitch-attr.rst +++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst @@ -57,6 +57,18 @@ The following is a list of E-Switch attributes. * ``none`` Disable encapsulation support. * ``basic`` Enable encapsulation support. + * - ``state`` + - enum + - The state of the E-Switch. + In situations where the user want to bring up the e-switch, they want to + have the ability to block traffic towards the FDB until FDB is fully + programmed. + The state can be one of the following: + + * ``active`` Traffic is enabled on this eswitch FDB - default mode + * ``inactive`` Traffic is disabled on this eswitch FDB - no traffic + will be forwarded to/from this eswitch FDB + Example Usage ============= @@ -74,3 +86,6 @@ Example Usage # enable encap-mode with legacy mode $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic + + # enable switchdev mode in inactive state + $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev state inactive diff --git a/include/net/devlink.h b/include/net/devlink.h index 8d4362f010e4..aca56a905ab8 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1369,6 +1369,11 @@ struct devlink_ops { int (*eswitch_encap_mode_set)(struct devlink *devlink, enum devlink_eswitch_encap_mode encap_mode, struct netlink_ext_ack *extack); + int (*eswitch_state_get)(struct devlink *devlink, + enum devlink_eswitch_state *state); + int (*eswitch_state_set)(struct devlink *devlink, + enum devlink_eswitch_state state, + struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); /** diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index bcad11a787a5..a01443810658 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -195,6 +195,11 @@ enum devlink_eswitch_encap_mode { DEVLINK_ESWITCH_ENCAP_MODE_BASIC, }; +enum devlink_eswitch_state { + DEVLINK_ESWITCH_STATE_INACTIVE, + DEVLINK_ESWITCH_STATE_ACTIVE, +}; + enum devlink_port_flavour { DEVLINK_PORT_FLAVOUR_PHYSICAL, /* Any kind of a port physically * facing the user. @@ -638,6 +643,8 @@ enum devlink_attr { DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, /* u64 */ + DEVLINK_ATTR_ESWITCH_STATE, /* u8 */ + /* Add new attributes above here, update the spec in * Documentation/netlink/specs/devlink.yaml and re-generate * net/devlink/netlink_gen.c. diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 02602704bdea..1eea3e2c1ade 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -672,6 +672,17 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; } + if (ops->eswitch_state_get) { + enum devlink_eswitch_state state; + + err = ops->eswitch_state_get(devlink, &state); + if (err) + return err; + err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_STATE, state); + if (err) + return err; + } + genlmsg_end(msg, hdr); return 0; @@ -706,6 +717,7 @@ int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; enum devlink_eswitch_encap_mode encap_mode; + enum devlink_eswitch_state state; u8 inline_mode; int err = 0; u16 mode; @@ -722,6 +734,24 @@ int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } + state = DEVLINK_ESWITCH_STATE_ACTIVE; + if (info->attrs[DEVLINK_ATTR_ESWITCH_STATE]) { + if (!ops->eswitch_state_set) + return -EOPNOTSUPP; + state = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_STATE]); + } + /* If user did not supply the state attribute, the default is + * active state. If the state was not explicitly set, set the default + * state for drivers that support eswitch state. + * Keep this after mode-set as state handling can be dependent on + * the eswitch mode. + */ + if (ops->eswitch_state_set) { + err = ops->eswitch_state_set(devlink, state, info->extack); + if (err) + return err; + } + if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) { if (!ops->eswitch_inline_mode_set) return -EOPNOTSUPP; diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 9fd00977d59e..e0910fb2214d 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -226,12 +226,13 @@ static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NA }; /* DEVLINK_CMD_ESWITCH_SET - do */ -static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { +static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_STATE + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_ATTR_ESWITCH_STATE] = NLA_POLICY_MAX(NLA_U8, 1), }; /* DEVLINK_CMD_DPIPE_TABLE_GET - do */ @@ -822,7 +823,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_eswitch_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_eswitch_set_nl_policy, - .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + .maxattr = DEVLINK_ATTR_ESWITCH_STATE, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { -- 2.51.0 From: Saeed Mahameed MPFS (Multi PF Switch) is enabled by default in Multi-Host environments, the driver keeps a list of desired unicast mac addresses of all vports (vfs/Sfs) and applied to HW via L2_table FW command. Add API to dynamically apply the list of MACs to HW when needed for next patches, to utilize this new API in devlink eswitch active/in-active uAPI. Signed-off-by: Saeed Mahameed Signed-off-by: Adithya Jayachandran Reviewed-by: Mark Bloch --- .../ethernet/mellanox/mlx5/core/lib/mpfs.c | 111 +++++++++++++++--- .../ethernet/mellanox/mlx5/core/lib/mpfs.h | 9 ++ 2 files changed, 103 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 4450091e181a..9230c31539fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -65,13 +65,14 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) /* UC L2 table hash node */ struct l2table_node { struct l2addr_node node; - u32 index; /* index in HW l2 table */ + int index; /* index in HW l2 table */ int ref_count; }; struct mlx5_mpfs { struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; struct mutex lock; /* Synchronize l2 table access */ + bool enabled; u32 size; unsigned long *bitmap; }; @@ -114,6 +115,8 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) return -ENOMEM; } + mpfs->enabled = true; + dev->priv.mpfs = mpfs; return 0; } @@ -135,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; int err = 0; - u32 index; + int index; if (!mpfs) return 0; @@ -148,30 +151,32 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) goto out; } - err = alloc_l2table_index(mpfs, &index); - if (err) - goto out; - l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { err = -ENOMEM; - goto hash_add_err; + goto out; } - err = set_l2table_entry_cmd(dev, index, mac); - if (err) - goto set_table_entry_err; + index = -1; + + if (mpfs->enabled) { + err = alloc_l2table_index(mpfs, &index); + if (err) + goto hash_del; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) + goto free_l2table_index; + } l2addr->index = index; l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); goto out; - -set_table_entry_err: - l2addr_hash_del(l2addr); -hash_add_err: +free_l2table_index: free_l2table_index(mpfs, index); +hash_del: + l2addr_hash_del(l2addr); out: mutex_unlock(&mpfs->lock); return err; @@ -183,7 +188,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; int err = 0; - u32 index; + int index; if (!mpfs) return 0; @@ -200,12 +205,84 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; index = l2addr->index; - del_l2table_entry_cmd(dev, index); + if (index >= 0) { + del_l2table_entry_cmd(dev, index); + free_l2table_index(mpfs, index); + } l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); unlock: mutex_unlock(&mpfs->lock); return err; } EXPORT_SYMBOL(mlx5_mpfs_del_mac); + +int mlx5_mpfs_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + struct hlist_node *n; + int err = 0; + + if (!mpfs) + return -ENODEV; + + mutex_lock(&mpfs->lock); + if (mpfs->enabled) + goto out; + mpfs->enabled = true; + mlx5_core_dbg(dev, "MPFS enabling mpfs\n"); + + mlx5_mpfs_foreach(l2addr, n, mpfs) { + u32 index; + + err = alloc_l2table_index(mpfs, &index); + if (err) { + mlx5_core_err(dev, "Failed to allocated MPFS index for %pM, err(%d)\n", + l2addr->node.addr, err); + goto out; + } + + err = set_l2table_entry_cmd(dev, index, l2addr->node.addr); + if (err) { + mlx5_core_err(dev, "Failed to set MPFS l2table entry for %pM index=%d, err(%d)\n", + l2addr->node.addr, index, err); + free_l2table_index(mpfs, index); + goto out; + } + + l2addr->index = index; + mlx5_core_dbg(dev, "MPFS entry %pM, set @index (%d)\n", + l2addr->node.addr, l2addr->index); + } +out: + mutex_unlock(&mpfs->lock); + return err; +} + +void mlx5_mpfs_disable(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + struct hlist_node *n; + + if (!mpfs) + return; + + mutex_lock(&mpfs->lock); + if (!mpfs->enabled) + goto unlock; + mlx5_mpfs_foreach(l2addr, n, mpfs) { + if (l2addr->index < 0) + continue; + del_l2table_entry_cmd(dev, l2addr->index); + free_l2table_index(mpfs, l2addr->index); + mlx5_core_dbg(dev, "MPFS entry %pM, deleted @index (%d)\n", + l2addr->node.addr, l2addr->index); + l2addr->index = -1; + } + mpfs->enabled = false; + mlx5_core_dbg(dev, "MPFS disabled\n"); +unlock: + mutex_unlock(&mpfs->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h index 4a293542a7aa..866c94982e46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -45,6 +45,10 @@ struct l2addr_node { u8 addr[ETH_ALEN]; }; +#define mlx5_mpfs_foreach(hs, tmp, mpfs) \ + for (int j = 0; j < MLX5_L2_ADDR_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hs, tmp, &(mpfs)->hash[j], node.hlist) + #define for_each_l2hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) @@ -82,11 +86,16 @@ struct l2addr_node { }) #ifdef CONFIG_MLX5_MPFS +struct mlx5_core_dev; int mlx5_mpfs_init(struct mlx5_core_dev *dev); void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_enable(struct mlx5_core_dev *dev); +void mlx5_mpfs_disable(struct mlx5_core_dev *dev); #else /* #ifndef CONFIG_MLX5_MPFS */ static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +int mlx5_mpfs_enable(struct mlx5_core_dev *dev) { return 0; } +void mlx5_mpfs_disable(struct mlx5_core_dev *dev) {} #endif #endif -- 2.51.0 From: Saeed Mahameed Support eswitch state: - Active State: Allow FDB traffic, Connect adjacent vports and apply l2 mpfs rules. - Inactive / Deactivated State: Drop all traffic going to FDB, Remove mpfs l2 rules and disconnect adjacent vports. Signed-off-by: Saeed Mahameed Signed-off-by: Adithya Jayachandran Reviewed-by: Mark Bloch --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 + .../mellanox/mlx5/core/esw/adj_vport.c | 15 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 1 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 157 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_core.c | 5 + include/linux/mlx5/fs.h | 1 + 7 files changed, 182 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index a0b68321355a..32dbb11db94b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -371,6 +371,8 @@ static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_state_get = mlx5_devlink_eswitch_state_get, + .eswitch_state_set = mlx5_devlink_eswitch_state_set, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c index 0091ba697bae..250af09b5af2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c @@ -4,13 +4,8 @@ #include "fs_core.h" #include "eswitch.h" -enum { - MLX5_ADJ_VPORT_DISCONNECT = 0x0, - MLX5_ADJ_VPORT_CONNECT = 0x1, -}; - -static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, - u16 vport, bool connect) +int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, + bool connect) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; @@ -24,7 +19,7 @@ static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1); MLX5_SET(modify_vport_state_in, in, ingress_connect, connect); MLX5_SET(modify_vport_state_in, in, egress_connect, connect); - + MLX5_SET(modify_vport_state_in, in, admin_state, connect); return mlx5_cmd_exec_in(dev, modify_vport_state, in); } @@ -96,7 +91,6 @@ static int mlx5_esw_adj_vport_create(struct mlx5_eswitch *esw, u16 vhca_id, if (err) goto acl_ns_remove; - mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT); return 0; acl_ns_remove: @@ -117,8 +111,7 @@ static void mlx5_esw_adj_vport_destroy(struct mlx5_eswitch *esw, esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n", vport_num, vport->vhca_id); - mlx5_esw_adj_vport_modify(esw->dev, vport_num, - MLX5_ADJ_VPORT_DISCONNECT); + mlx5_esw_offloads_rep_remove(esw, vport); mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, vport->index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ad6858789e48..b22f270e4859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2044,6 +2044,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; + esw->state = DEVLINK_ESWITCH_STATE_ACTIVE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4fe285ce32aa..5fd70bd8fb8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -264,6 +264,10 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *drop_root; + struct mlx5_flow_handle *drop_root_rule; + struct mlx5_fc *drop_root_counter; + struct dentry *drop_root_dbgfs; struct mlx5_flow_table *tc_miss_table; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; @@ -392,6 +396,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; u32 last_vport_idx; int mode; + u8 state; u16 manager_vport; u16 first_host_vport; u8 num_peers; @@ -569,6 +574,11 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); +int mlx5_devlink_eswitch_state_get(struct devlink *devlink, + enum devlink_eswitch_state *state); +int mlx5_devlink_eswitch_state_set(struct devlink *devlink, + enum devlink_eswitch_state state, + struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); @@ -633,6 +643,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw); void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, u16 vport, + bool connect); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f289e846ea3a..326f1e33799c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1577,6 +1577,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; attr.mapping = esw->offloads.reg_c0_obj_pool; + attr.fs_base_prio = FDB_BYPASS_PATH; chains = mlx5_chains_create(dev, &attr); if (IS_ERR(chains)) { @@ -2354,6 +2355,115 @@ static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); } +static void mlx5_esw_fdb_drop_destroy(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.offloads.drop_root) + return; + + mlx5_del_flow_rules(esw->fdb_table.offloads.drop_root_rule); + mlx5_fc_destroy(esw->dev, esw->fdb_table.offloads.drop_root_counter); + mlx5_destroy_flow_table(esw->fdb_table.offloads.drop_root); + esw->fdb_table.offloads.drop_root_counter = NULL; + esw->fdb_table.offloads.drop_root_rule = NULL; + esw->fdb_table.offloads.drop_root = NULL; +} + +static int mlx5_esw_fdb_drop_create(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dst = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_table *table; + int err = 0; + + if (esw->fdb_table.offloads.drop_root) + return 0; + + root_ns = esw->fdb_table.offloads.ns; + + ft_attr.prio = FDB_DROP_ROOT; + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + table = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(table)) { + esw_warn(dev, "Failed to create fdb drop root table, err %ld\n", + PTR_ERR(table)); + return PTR_ERR(table); + } + + dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst.counter = mlx5_fc_create(dev, 0); + err = PTR_ERR_OR_ZERO(dst.counter); + if (err) { + esw_warn(dev, "Failed to create fdb drop counter, err %d\n", + err); + goto err_counter; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_rule = mlx5_add_flow_rules(table, NULL, &flow_act, &dst, 1); + err = PTR_ERR_OR_ZERO(flow_rule); + if (err) { + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx drop rule err %d\n", + err); + goto err_flow_rule; + } + + esw->fdb_table.offloads.drop_root = table; + esw->fdb_table.offloads.drop_root_rule = flow_rule; + esw->fdb_table.offloads.drop_root_counter = dst.counter; + return 0; + +err_flow_rule: + mlx5_fc_destroy(dev, dst.counter); +err_counter: + mlx5_destroy_flow_table(table); + return err; +} + +static void mlx5_esw_fdb_active(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_fdb_drop_destroy(esw); + mlx5_mpfs_enable(esw->dev); + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + /* connect vport to this esw */ + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, true); + } + + esw->state = DEVLINK_ESWITCH_STATE_ACTIVE; + esw_warn(esw->dev, "MPFS/FDB activated\n"); +} + +static void mlx5_esw_fdb_inactive(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_mpfs_disable(esw->dev); + mlx5_esw_fdb_drop_create(esw); + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + /* disconnect vport from this esw */ + mlx5_esw_adj_vport_modify(esw->dev, vport->vport, false); + } + + esw->state = DEVLINK_ESWITCH_STATE_INACTIVE; + esw_warn(esw->dev, "MPFS/FDB de-activated\n"); +} + static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -3656,6 +3766,10 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) { mlx5_eswitch_disable_pf_vf_vports(esw); mlx5_esw_offloads_rep_unload(esw, MLX5_VPORT_UPLINK); + + if (esw->state == DEVLINK_ESWITCH_STATE_INACTIVE) + mlx5_esw_fdb_active(esw); /* legacy mode always active */ + esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); mapping_destroy(esw->offloads.reg_c0_obj_pool); @@ -3851,6 +3965,49 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return esw_mode_to_devlink(esw->mode, mode); } +int mlx5_devlink_eswitch_state_get(struct devlink *devlink, + enum devlink_eswitch_state *state) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + *state = esw->state; + return 0; +} + +int mlx5_devlink_eswitch_state_set(struct devlink *devlink, + enum devlink_eswitch_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + + esw = mlx5_devlink_eswitch_get(devlink); + if (IS_ERR(esw)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to query eswitch"); + return PTR_ERR(esw); + } + + if (esw->mode == MLX5_ESWITCH_LEGACY) { + if (state != DEVLINK_ESWITCH_STATE_ACTIVE) { + NL_SET_ERR_MSG_MOD(extack, + "legacy mode only supports active state"); + return -EOPNOTSUPP; + } + return 0; + } + + if (state == DEVLINK_ESWITCH_STATE_ACTIVE) + mlx5_esw_fdb_active(esw); + else + mlx5_esw_fdb_inactive(esw); + + esw->state = state; + return 0; +} + static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, struct netlink_ext_ack *extack) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4308e89802f3..c8cfcf939d08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3520,6 +3520,11 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_root_ns) return -ENOMEM; + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_DROP_ROOT, 1); + err = PTR_ERR_OR_ZERO(maj_prio); + if (err) + goto out_err; + err = create_fdb_bypass(steering); if (err) goto out_err; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 86055d55836d..f1fe56e7efdb 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -116,6 +116,7 @@ enum mlx5_flow_namespace_type { }; enum { + FDB_DROP_ROOT, FDB_BYPASS_PATH, FDB_CRYPTO_INGRESS, FDB_TC_OFFLOAD, -- 2.51.0