This patch adds the bulk of the changes needed in order to support offloading of an upper bond device. First of all, handling of the NETDEV_CHANGEUPPER and NETDEV_PRECHANGEUPPER events is extended so that the driver is capable to handle joining or leaving an upper bond device. All the restrictions around the LAG offload support are added in the newly added dpaa2_switch_pre_lag_join() function. The same events are extended to also detect if one of our upper bond devices changes its own upper device. In this case, on each lower device that is DPAA2 the corresponding dpaa2_switch_port_[pre]changeupper() function will be called. This will start the process of joining the same FDB as the one used by the bridge device. Setting the 'offload_fwd_mark' field on the skbs is also extended to be setup not only when the port is under a bridge but also under a bond device that is offloaded. Signed-off-by: Ioana Ciornei --- Changes in v2: - Extend dpaa2_switch_prechangeupper_sanity_checks() with netdev_walk_all_lower_dev() so that checks are done on all lower devices of a bridge, even for the lowers of a bridged bond. - Manage better the default VLAN on bond join - Clean-up the error path in dpaa2_switch_port_bond_join() - Call dpaa2_switch_port_bridge_leave() in case a port is leaving a bond which is also a bridged port - Update dpaa2_switch_port_bond_leave() so that in case of any failure the driver tries to cleanup the LAG offload configuration. - Call switchdev_bridge_port_unoffload() in a switch port is leaving a bridge bond device. --- .../ethernet/freescale/dpaa2/dpaa2-switch.c | 454 +++++++++++++++++- .../ethernet/freescale/dpaa2/dpaa2-switch.h | 14 +- 2 files changed, 461 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 52c1cb9cb7e0..88f52ac04c0a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -51,6 +51,17 @@ dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw) return NULL; } +static struct dpaa2_switch_lag * +dpaa2_switch_lag_get_unused(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (!ethsw->lags[i].in_use) + return ðsw->lags[i]; + return NULL; +} + static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv, struct net_device *bridge_dev) { @@ -2157,15 +2168,30 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net return 0; } +static int dpaa2_switch_check_dpsw_instance(struct net_device *dev, + struct netdev_nested_priv *priv) +{ + struct ethsw_port_priv *port_priv = (struct ethsw_port_priv *)priv->data; + struct ethsw_port_priv *other_priv = netdev_priv(dev); + + if (!dpaa2_switch_port_dev_check(dev)) + return 0; + + if (other_priv->ethsw_data == port_priv->ethsw_data) + return 0; + + return 1; +} + static int dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); - struct ethsw_port_priv *other_port_priv; - struct net_device *other_dev; - struct list_head *iter; + struct netdev_nested_priv data = { + .data = (void *)port_priv, + }; int err; if (!br_vlan_enabled(upper_dev)) { @@ -2180,6 +2206,70 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, return 0; } + err = netdev_walk_all_lower_dev(upper_dev, + dpaa2_switch_check_dpsw_instance, + &data); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Interface from a different DPSW is in the bridge already"); + return -EINVAL; + } + + return 0; +} + +static int dpaa2_switch_pre_lag_join(struct net_device *netdev, + struct net_device *upper_dev, + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct ethsw_port_priv *other_port_priv; + struct dpaa2_switch_lag *lag = NULL; + struct dpsw_lag_cfg cfg = {0}; + struct net_device *other_dev; + int i, num_ifs = 0, err; + struct list_head *iter; + + if (!(ethsw->features & ETHSW_FEATURE_LAG_OFFLOAD)) { + NL_SET_ERR_MSG_MOD(extack, + "LAG offload is supported only for DPSW >= v8.13"); + return -EOPNOTSUPP; + } + + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG_MOD(extack, + "Can only offload LAG using hash TX type"); + return -EOPNOTSUPP; + } + + if (info->hash_type != NETDEV_LAG_HASH_L23) { + NL_SET_ERR_MSG_MOD(extack, "Can only offload L2+L3 Tx hash"); + return -EOPNOTSUPP; + } + + if (!dpaa2_switch_port_has_mac(port_priv)) { + NL_SET_ERR_MSG_MOD(extack, + "Only switch interfaces connected to MACs can be under a LAG"); + return -EINVAL; + } + + if (vlan_uses_dev(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot join a LAG upper that has a VLAN"); + return -EOPNOTSUPP; + } + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + if (!ethsw->lags[i].in_use) + continue; + if (ethsw->lags[i].bond_dev != upper_dev) + continue; + lag = ðsw->lags[i]; + break; + } + netdev_for_each_lower_dev(upper_dev, other_dev, iter) { if (!dpaa2_switch_port_dev_check(other_dev)) continue; @@ -2187,19 +2277,240 @@ dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev, other_port_priv = netdev_priv(other_dev); if (other_port_priv->ethsw_data != port_priv->ethsw_data) { NL_SET_ERR_MSG_MOD(extack, - "Interface from a different DPSW is in the bridge already"); + "Interface from a different DPSW is in the bond already"); + return -EINVAL; + } + + cfg.if_id[num_ifs++] = other_port_priv->idx; + + if (num_ifs >= DPSW_MAX_LAG_IFS) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot add more than 8 DPAA2 switch ports under the same bond"); return -EINVAL; } } + if (lag) { + cfg.group_id = lag->id; + cfg.if_id[num_ifs++] = port_priv->idx; + cfg.num_ifs = num_ifs; + cfg.phase = DPSW_LAG_SET_PHASE_CHECK; + + err = dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload LAG configuration"); + return -EOPNOTSUPP; + } + } + return 0; } +static void dpaa2_switch_port_set_lag_group(struct ethsw_port_priv *port_priv, + struct net_device *bond_dev) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct ethsw_port_priv *other_port_priv = NULL; + struct dpaa2_switch_lag *lag = NULL; + struct net_device *other_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(bond_dev, other_dev, iter) { + if (!dpaa2_switch_port_dev_check(other_dev)) + continue; + + other_port_priv = netdev_priv(other_dev); + if (!other_port_priv->lag) + continue; + + if (other_port_priv->lag->bond_dev == bond_dev) { + port_priv->lag = other_port_priv->lag; + return; + } + } + + /* This is the first interface to be added under a bond device. Find an + * unused LAG group. No need to check for NULL since there are the same + * amount of DPSW ports as LAG groups, meaning that each port can have + * its own LAG group. + */ + lag = dpaa2_switch_lag_get_unused(ethsw); + lag->in_use = true; + lag->bond_dev = bond_dev; + port_priv->lag = lag; +} + +static int dpaa2_switch_set_lag_cfg(struct net_device *bond_dev, u8 lag_id, + struct ethsw_core *ethsw) +{ + struct dpaa2_switch_lag *lag = ðsw->lags[lag_id - 1]; + struct ethsw_port_priv *other_port_priv = NULL; + struct dpsw_lag_cfg cfg = {0}; + u8 num_ifs = 0; + int i; + + cfg.group_id = lag_id; + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + other_port_priv = ethsw->ports[i]; + + if (!other_port_priv) + continue; + if (!other_port_priv->lag) + continue; + if (other_port_priv->lag->bond_dev != bond_dev) + continue; + + /* No need to check against DPSW_MAX_LAG_IFS since this + * was done in the prechangeupper stage. The flow will + * not reach this point in case there are more DPAA2 + * switch ports under the same bond than we can accept. + */ + cfg.if_id[num_ifs++] = other_port_priv->idx; + } + + cfg.num_ifs = num_ifs; + + /* No more interfaces under this LAG group, mark it as not in use */ + if (!num_ifs) { + lag->bond_dev = NULL; + lag->in_use = false; + } + + return dpsw_lag_set(ethsw->mc_io, 0, ethsw->dpsw_handle, &cfg); +} + +static int dpaa2_switch_port_bond_join(struct net_device *netdev, + struct net_device *bond_dev, + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_fdb *old_fdb = port_priv->fdb; + struct net_device *bridge_dev; + int err = 0; + u8 lag_id; + + /* Delete the default VLAN, we might change our FDB in this operation */ + err = dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID); + if (err) + return err; + + /* Setup the egress flood policy (broadcast, unknown unicast) */ + dpaa2_switch_port_set_fdb(port_priv, bond_dev); + err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); + if (err) + goto err_egress_flood; + + /* Recreate the egress flood domain of the FDB that we just left. */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id); + if (err) + goto err_egress_flood; + + /* Setup the port_priv->lag pointer for this switch port */ + dpaa2_switch_port_set_lag_group(port_priv, bond_dev); + + /* Create the LAG configuration and apply it in MC */ + lag_id = port_priv->lag->id; + err = dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw); + if (err) + goto err_lag_cfg; + + err = dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID, + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID); + if (err) + goto err_vlan_add; + + /* If the bond device is a switch port, then join the bridge as well */ + bridge_dev = netdev_master_upper_dev_get(bond_dev); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return 0; + + err = dpaa2_switch_port_bridge_join(netdev, bridge_dev, extack); + if (err) + goto err_bridge_join; + + return err; + +err_bridge_join: + dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID); +err_vlan_add: +err_lag_cfg: + port_priv->lag = NULL; + dpaa2_switch_set_lag_cfg(bond_dev, lag_id, ethsw); +err_egress_flood: + dpaa2_switch_port_set_fdb(port_priv, NULL); + dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID, + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID); + return err; +} + +static int dpaa2_switch_port_bond_leave(struct net_device *netdev, + struct net_device *bond_dev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct dpaa2_switch_fdb *old_fdb = port_priv->fdb; + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_lag *lag = port_priv->lag; + struct net_device *bridge_dev; + int err = 0; + + /* In case the bond is a bridge port, leave the upper bridge as well */ + bridge_dev = netdev_master_upper_dev_get(bond_dev); + if (bridge_dev && netif_is_bridge_master(bridge_dev)) { + err = dpaa2_switch_port_bridge_leave(netdev); + if (err) + goto lag_cleanup; + } + + /* Delete the default VLAN, we might change our FDB in this operation */ + err = dpaa2_switch_port_del_vlan(port_priv, DEFAULT_VLAN_ID); + if (err) + goto lag_cleanup; + + /* Setup the FDB for this port which is now standalone */ + dpaa2_switch_port_set_fdb(port_priv, NULL); + + /* Setup the egress flood policy (broadcast, unknown unicast). + * When the port is not under a bond, only the CTRL interface is part + * of the flooding domain besides the actual port. + */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); + if (err) + goto lag_cleanup; + + /* Recreate the egress flood domain of the FDB that we just left. */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id); + if (err) + goto lag_cleanup; + + /* Add the VLAN 1 as PVID when not under a bond. We need this since + * the dpaa2 switch interfaces are not capable to be VLAN unaware + */ + err = dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID, + BRIDGE_VLAN_INFO_UNTAGGED | + BRIDGE_VLAN_INFO_PVID); + if (err) + goto lag_cleanup; + +lag_cleanup: + /* Recreate the LAG configuration for the LAG group that we left. In + * case any step failed, at least we free up a LAG resource. + */ + port_priv->lag = NULL; + dpaa2_switch_set_lag_cfg(bond_dev, lag->id, ethsw); + + return err; +} + static int dpaa2_switch_port_prechangeupper(struct net_device *netdev, struct netdev_notifier_changeupper_info *info) { + struct net_device *upper_dev, *br; struct netlink_ext_ack *extack; - struct net_device *upper_dev; int err; if (!dpaa2_switch_port_dev_check(netdev)) @@ -2216,6 +2527,23 @@ static int dpaa2_switch_port_prechangeupper(struct net_device *netdev, if (!info->linking) dpaa2_switch_port_pre_bridge_leave(netdev); + } else if (netif_is_lag_master(upper_dev)) { + if (!info->linking && netif_is_bridge_port(upper_dev)) { + dpaa2_switch_port_pre_bridge_leave(netdev); + return 0; + } + + if (netif_is_bridge_port(upper_dev)) { + br = netdev_master_upper_dev_get(upper_dev); + err = dpaa2_switch_prechangeupper_sanity_checks(netdev, + br, + extack); + if (err) + return err; + } + + return dpaa2_switch_pre_lag_join(netdev, upper_dev, + info->upper_info, extack); } return 0; @@ -2240,6 +2568,80 @@ static int dpaa2_switch_port_changeupper(struct net_device *netdev, extack); else return dpaa2_switch_port_bridge_leave(netdev); + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) + return dpaa2_switch_port_bond_join(netdev, upper_dev, + info->upper_info, + extack); + else + return dpaa2_switch_port_bond_leave(netdev, upper_dev); + } + + return 0; +} + +static int +dpaa2_switch_lag_prechangeupper(struct net_device *netdev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = 0; + + if (!netif_is_lag_master(netdev)) + return 0; + + netdev_for_each_lower_dev(netdev, lower, iter) { + if (!dpaa2_switch_port_dev_check(lower)) + continue; + + err = dpaa2_switch_port_prechangeupper(lower, info); + if (err) + return err; + } + + return err; +} + +static int +dpaa2_switch_lag_changeupper(struct net_device *netdev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = 0; + + if (!netif_is_lag_master(netdev)) + return 0; + + netdev_for_each_lower_dev(netdev, lower, iter) { + if (!dpaa2_switch_port_dev_check(lower)) + continue; + + err = dpaa2_switch_port_changeupper(lower, info); + if (err) + return err; + } + + return 0; +} + +static int +dpaa2_switch_port_changelowerstate(struct net_device *netdev, + struct netdev_lag_lower_state_info *linfo) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + int err; + + if (!port_priv->lag) + return 0; + + err = dpsw_if_set_lag_state(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, linfo->tx_enabled ? 1 : 0); + if (err) { + netdev_err(netdev, "dpsw_if_set_lag_state() = %d\n", err); + return err; } return 0; @@ -2249,6 +2651,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changelowerstate_info *info; int err = 0; switch (event) { @@ -2257,13 +2660,29 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, if (err) return notifier_from_errno(err); + err = dpaa2_switch_lag_prechangeupper(netdev, ptr); + if (err) + return notifier_from_errno(err); + break; case NETDEV_CHANGEUPPER: err = dpaa2_switch_port_changeupper(netdev, ptr); if (err) return notifier_from_errno(err); + err = dpaa2_switch_lag_changeupper(netdev, ptr); + if (err) + return notifier_from_errno(err); + break; + case NETDEV_CHANGELOWERSTATE: + info = ptr; + if (!dpaa2_switch_port_dev_check(netdev)) + break; + + err = dpaa2_switch_port_changelowerstate(netdev, + info->lower_state_info); + return notifier_from_errno(err); } return NOTIFY_DONE; @@ -2500,8 +2919,11 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq, skb->dev = netdev; skb->protocol = eth_type_trans(skb, skb->dev); - /* Setup the offload_fwd_mark only if the port is under a bridge */ + /* Setup the offload_fwd_mark only if the port is under a bridge + * or under a bond device that is offloaded. + */ skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev); + skb->offload_fwd_mark |= !!(port_priv->lag); netif_receive_skb(skb); @@ -2517,6 +2939,9 @@ static void dpaa2_switch_detect_features(struct ethsw_core *ethsw) if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6)) ethsw->features |= ETHSW_FEATURE_MAC_ADDR; + + if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 13)) + ethsw->features |= ETHSW_FEATURE_LAG_OFFLOAD; } static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw) @@ -3301,6 +3726,7 @@ static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev) kfree(ethsw->fdbs); kfree(ethsw->filter_blocks); kfree(ethsw->ports); + kfree(ethsw->lags); dpaa2_switch_teardown(sw_dev); @@ -3328,6 +3754,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, port_priv = netdev_priv(port_netdev); port_priv->netdev = port_netdev; port_priv->ethsw_data = ethsw; + port_priv->lag = NULL; mutex_init(&port_priv->mac_lock); @@ -3435,6 +3862,19 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) goto err_free_fdbs; } + ethsw->lags = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->lags), + GFP_KERNEL); + if (!ethsw->lags) { + err = -ENOMEM; + goto err_free_filter; + } + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + ethsw->lags[i].bond_dev = NULL; + ethsw->lags[i].ethsw = ethsw; + ethsw->lags[i].id = i + 1; + ethsw->lags[i].in_use = 0; + } + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { err = dpaa2_switch_probe_port(ethsw, i); if (err) @@ -3481,6 +3921,8 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) err_free_netdev: for (i--; i >= 0; i--) dpaa2_switch_remove_port(ethsw, i); + kfree(ethsw->lags); +err_free_filter: kfree(ethsw->filter_blocks); err_free_fdbs: kfree(ethsw->fdbs); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index 42b3ca73f55d..56debbdefd13 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -41,7 +41,8 @@ #define ETHSW_MAX_FRAME_LENGTH (DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN) #define ETHSW_L2_MAX_FRM(mtu) ((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN) -#define ETHSW_FEATURE_MAC_ADDR BIT(0) +#define ETHSW_FEATURE_MAC_ADDR BIT(0) +#define ETHSW_FEATURE_LAG_OFFLOAD BIT(1) /* Number of receive queues (one RX and one TX_CONF) */ #define DPAA2_SWITCH_RX_NUM_FQS 2 @@ -105,6 +106,13 @@ struct dpaa2_switch_fdb { bool in_use; }; +struct dpaa2_switch_lag { + struct ethsw_core *ethsw; + struct net_device *bond_dev; + bool in_use; + u8 id; +}; + struct dpaa2_switch_acl_entry { struct list_head list; u16 prio; @@ -163,6 +171,8 @@ struct ethsw_port_priv { struct dpaa2_mac *mac; /* Protects against changes to port_priv->mac */ struct mutex mac_lock; + + struct dpaa2_switch_lag *lag; }; /* Switch data */ @@ -190,6 +200,8 @@ struct ethsw_core { struct dpaa2_switch_fdb *fdbs; struct dpaa2_switch_filter_block *filter_blocks; u16 mirror_port; + + struct dpaa2_switch_lag *lags; }; static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw, -- 2.25.1