Introduce a new option ad_actor_port_prio, allowing users to set the actor port priority on a per-port basis in LACPDU. This priority can be used in future enhancements to influence aggregator selection via ad_select policy. Signed-off-by: Hangbin Liu --- Documentation/networking/bonding.rst | 9 +++++++ drivers/net/bonding/bond_3ad.c | 2 ++ drivers/net/bonding/bond_netlink.c | 16 +++++++++++++ drivers/net/bonding/bond_options.c | 35 ++++++++++++++++++++++++++++ include/net/bond_3ad.h | 1 + include/net/bond_options.h | 1 + include/uapi/linux/if_link.h | 1 + 7 files changed, 65 insertions(+) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index a4c1291d2561..5e105e7ac8e6 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -193,6 +193,15 @@ ad_actor_sys_prio This parameter has effect only in 802.3ad mode and is available through SysFs interface. +ad_actor_port_prio + + In an AD system, this specifies the port priority. The allowed range + is 1 - 65535. If the value is not specified, it takes 255 as the + default value. + + This parameter has effect only in 802.3ad mode and is available through + netlink interface. + ad_actor_system In an AD system, this specifies the mac-address for the actor in diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index c6807e473ab7..4a1b2f01fe37 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -436,6 +436,7 @@ static void __ad_actor_update_port(struct port *port) port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; port->actor_system_priority = BOND_AD_INFO(bond).system.sys_priority; + port->actor_port_priority = SLAVE_AD_INFO(port->slave)->port_priority; } /* Conversions */ @@ -2197,6 +2198,7 @@ void bond_3ad_bind_slave(struct slave *slave) port->actor_admin_port_key = bond->params.ad_user_port_key << 6; ad_update_actor_keys(port, false); /* actor system is the bond's system */ + SLAVE_AD_INFO(slave)->port_priority = port->actor_port_priority; __ad_actor_update_port(port); /* tx timer(to verify that no more than MAX_TX_IN_SECOND * lacpdu's are sent in one second) diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index ac5e402c34bc..ad91b93cbdac 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev, nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ + nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO */ 0; } @@ -77,6 +78,10 @@ static int bond_fill_slave_info(struct sk_buff *skb, ad_port->partner_oper.port_state)) goto nla_put_failure; } + + if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO, + SLAVE_AD_INFO(slave)->port_priority)) + goto nla_put_failure; } return 0; @@ -129,6 +134,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, + [IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO] = { .type = NLA_U16 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], @@ -179,6 +185,16 @@ static int bond_slave_changelink(struct net_device *bond_dev, return err; } + if (data[IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO]) { + u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO]); + + bond_opt_slave_initval(&newval, &slave_dev, ad_prio); + err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_PORT_PRIO, &newval, + data[IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO], extack); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 91893c29b899..2b8606b4e4f5 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -79,6 +79,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_ad_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_user_port_key_set(struct bonding *bond, @@ -221,6 +223,12 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_ad_actor_port_prio_tbl[] = { + { "minval", 1, BOND_VALFLAG_MIN}, + { "maxval", 65535, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, +}; + static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -476,6 +484,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_ad_actor_sys_prio_tbl, .set = bond_option_ad_actor_sys_prio_set, }, + [BOND_OPT_AD_ACTOR_PORT_PRIO] = { + .id = BOND_OPT_AD_ACTOR_PORT_PRIO, + .name = "ad_actor_port_prio", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_ad_actor_port_prio_tbl, + .set = bond_option_ad_actor_port_prio_set, + }, [BOND_OPT_AD_ACTOR_SYSTEM] = { .id = BOND_OPT_AD_ACTOR_SYSTEM, .name = "ad_actor_system", @@ -1793,6 +1808,26 @@ static int bond_option_ad_actor_sys_prio_set(struct bonding *bond, return 0; } +static int bond_option_ad_actor_port_prio_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + struct slave *slave; + + slave = bond_slave_get_rtnl(newval->slave_dev); + if (!slave) { + netdev_dbg(bond->dev, "%s called on NULL slave\n", __func__); + return -ENODEV; + } + + netdev_dbg(newval->slave_dev, "Setting ad_actor_port_prio to %llu\n", + newval->value); + + SLAVE_AD_INFO(slave)->port_priority = newval->value; + bond_3ad_update_ad_actor_settings(bond); + + return 0; +} + static int bond_option_ad_actor_system_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index 2053cd8e788a..bf551ca70359 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -274,6 +274,7 @@ struct ad_slave_info { struct port port; /* 802.3ad port structure */ struct bond_3ad_stats stats; u16 id; + u16 port_priority; }; static inline const char *bond_3ad_churn_desc(churn_state_t state) diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 18687ccf0638..4aee1935e0e7 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -77,6 +77,7 @@ enum { BOND_OPT_NS_TARGETS, BOND_OPT_PRIO, BOND_OPT_COUPLED_CONTROL, + BOND_OPT_AD_ACTOR_PORT_PRIO, BOND_OPT_LAST }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3ad2d5d98034..79bcbbc264a7 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1562,6 +1562,7 @@ enum { IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, IFLA_BOND_SLAVE_PRIO, + IFLA_BOND_SLAVE_AD_ACTOR_PORT_PRIO, __IFLA_BOND_SLAVE_MAX, }; -- 2.46.0 Now that per-port actor priority is supported via ad_actor_port_prio, enable a new ad_select policy to choose the aggregator based on port priority. This allows users to influence aggregator selection by assigning higher or lower priorities to individual ports. Signed-off-by: Hangbin Liu --- Documentation/networking/bonding.rst | 9 ++++++++- drivers/net/bonding/bond_3ad.c | 29 ++++++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 1 + include/net/bond_3ad.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 5e105e7ac8e6..a234ec12ece7 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -250,7 +250,14 @@ ad_select ports (slaves). Reselection occurs as described under the "bandwidth" setting, above. - The bandwidth and count selection policies permit failover of + prio or 3 + + The active aggregator is chosen by the highest total sum of + actor port priorities across its active ports. Note this + priority is ad_actor_port_prio, not per port prio, which is + used for primary reselect. + + The bandwidth, count and prio selection policies permit failover of 802.3ad aggregations when partial failure of the active aggregator occurs. This keeps the aggregator with the highest availability (either in bandwidth or in number of ports) active at all times. diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 4a1b2f01fe37..6f8a406ed34a 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -747,6 +747,20 @@ static int __agg_active_ports(struct aggregator *agg) return active; } +static unsigned int __agg_ports_priority(struct aggregator *agg) +{ + struct port *port; + unsigned int prio = 0; + + for (port = agg->lag_ports; port; + port = port->next_port_in_aggregator) { + if (port->is_enabled) + prio += port->actor_port_priority; + } + + return prio; +} + /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at @@ -1695,6 +1709,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, * BOND_AD_COUNT: Select by count of ports. If count is equal, * select by bandwidth. * + * BOND_AD_PRIO: Select by total priority of ports. If priority + * is equal, select by count. + * * BOND_AD_STABLE, BOND_AD_BANDWIDTH: Select by bandwidth. */ if (!best) @@ -1713,6 +1730,14 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, return best; switch (__get_agg_selection_mode(curr->lag_ports)) { + case BOND_AD_PRIO: + if (__agg_ports_priority(curr) > __agg_ports_priority(best)) + return curr; + + if (__agg_ports_priority(curr) < __agg_ports_priority(best)) + return best; + + fallthrough; case BOND_AD_COUNT: if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; @@ -1778,6 +1803,10 @@ static int agg_device_up(const struct aggregator *agg) * (slaves), and reselect whenever a link state change takes place or the * set of slaves in the bond changes. * + * BOND_AD_PRIO: select the aggregator with highest total priority of ports + * (slaves), and reselect whenever a link state change takes place or the + * set of slaves in the bond changes. + * * FIXME: this function MUST be called with the first agg in the bond, or * __get_active_agg() won't work correctly. This function should be better * called with the bond itself, and retrieve the first agg from it. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 2b8606b4e4f5..708ca1f18a00 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -163,6 +163,7 @@ static const struct bond_opt_value bond_ad_select_tbl[] = { { "stable", BOND_AD_STABLE, BOND_VALFLAG_DEFAULT}, { "bandwidth", BOND_AD_BANDWIDTH, 0}, { "count", BOND_AD_COUNT, 0}, + { "prio", BOND_AD_PRIO, 0}, { NULL, -1, 0}, }; diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index bf551ca70359..34495df965f0 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -26,6 +26,7 @@ enum { BOND_AD_STABLE = 0, BOND_AD_BANDWIDTH = 1, BOND_AD_COUNT = 2, + BOND_AD_PRIO = 3, }; /* rx machine states(43.4.11 in the 802.3ad standard) */ -- 2.46.0 Add a selftest to verify that per-port actor priority (ad_actor_port_prio) is correctly applied and affects aggregator selection as expected. Move cmd_jq from forwarding/lib.sh to net/lib.sh. Signed-off-by: Hangbin Liu --- .../selftests/drivers/net/bonding/Makefile | 3 +- .../drivers/net/bonding/bond_lacp_prio.sh | 73 +++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 24 ------ tools/testing/selftests/net/lib.sh | 24 ++++++ 4 files changed, 99 insertions(+), 25 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 2b10854e4b1e..32617a834a6b 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh + bond_macvlan_ipvlan.sh \ + bond_lacp_prio.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a3f939d12143 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +# create client, switch, backup switch netns +setup_ns c_ns s_ns b_ns +defer cleanup_all_ns + +# setup links +# shellcheck disable=SC2154 +ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" +ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" +# shellcheck disable=SC2154 +ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" +ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + +ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast ad_select prio +ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast +ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 lacp_rate fast + +ip -n "${c_ns}" link set eth0 master bond0 +ip -n "${c_ns}" link set eth1 master bond0 +ip -n "${c_ns}" link set eth2 master bond0 +ip -n "${c_ns}" link set eth3 master bond0 +ip -n "${s_ns}" link set eth0 master bond0 +ip -n "${s_ns}" link set eth1 master bond0 +ip -n "${b_ns}" link set eth0 master bond0 +ip -n "${b_ns}" link set eth1 master bond0 + +ip -n "${c_ns}" link set bond0 up +ip -n "${s_ns}" link set bond0 up +ip -n "${b_ns}" link set bond0 up + +# set ad actor port priority, default 255 +ip -n "${c_ns}" link set eth0 type bond_slave ad_actor_port_prio 1000 +prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" ".[].linkinfo.info_slave_data.ad_actor_port_prio") +[ "$prio" -ne 1000 ] && RET=1 +ip -n "${c_ns}" link set eth2 type bond_slave ad_actor_port_prio 10 +prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" ".[].linkinfo.info_slave_data.ad_actor_port_prio") +[ "$prio" -ne 10 ] && RET=1 +log_test "bond 802.3ad" "ad_actor_port_prio setting" + +# Trigger link state change to reselect the aggregator +ip -n "${c_ns}" link set eth1 down +ip -n "${c_ns}" link set eth1 up +# the active agg should be connect to switch +bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" ".[].linkinfo.info_data.ad_info.aggregator") +eth0_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" ".[].linkinfo.info_slave_data.ad_aggregator_id") +if [ "${bond_agg_id}" -ne "${eth0_agg_id}" ]; then + RET=1 +fi + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave ad_actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave ad_actor_port_prio 1000 +# Trigger link state change to reselect the aggregator +ip -n "${c_ns}" link set eth1 down +ip -n "${c_ns}" link set eth1 up +# now the active agg should be connect to backup switch +bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" ".[].linkinfo.info_data.ad_info.aggregator") +eth2_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" ".[].linkinfo.info_slave_data.ad_aggregator_id") +# shellcheck disable=SC2034 +if [ "${bond_agg_id}" -ne "${eth2_agg_id}" ]; then + RET=1 +fi +log_test "bond 802.3ad" "ad_actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 508f3c700d71..09b63c6f3dbd 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -551,30 +551,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..4c278829e04c 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -616,3 +616,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} -- 2.46.0