Add a selftest for PRP that performs a basic ping test on IPv4 and IPv6, over the plain PRP interface and a VLAN interface, similar to the existing ping test for HSR. The test first checks reachability of the other node, then checks for no loss and no duplicates. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- tools/testing/selftests/net/hsr/Makefile | 1 + tools/testing/selftests/net/hsr/prp_ping.sh | 147 ++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100755 tools/testing/selftests/net/hsr/prp_ping.sh diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 4b6afc0fe9f8..1886f345897a 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -5,6 +5,7 @@ top_srcdir = ../../../../.. TEST_PROGS := \ hsr_ping.sh \ hsr_redbox.sh \ + prp_ping.sh \ # end of TEST_PROGS TEST_FILES += hsr_common.sh diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh new file mode 100755 index 000000000000..fd2ba9f05d4c --- /dev/null +++ b/tools/testing/selftests/net/hsr/prp_ping.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=true + +source ./hsr_common.sh + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage "$0" + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage "$0" + exit 1 + ;; +esac +done + +setup_prp_interfaces() +{ + echo "INFO: Preparing interfaces for PRP" +# Two PRP nodes, connected by two links (treated as LAN A and LAN B). +# +# vethA ----- vethA +# prp1 prp2 +# vethB ----- vethB +# +# node1 node2 + + # Interfaces + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +setup_vlan_interfaces() +{ + # Interfaces + ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2 + ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2 + + # IP addresses + ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2 + ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad + + ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2 + ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad + + # All links up + ip -net "$node1" link set prp1.2 up + ip -net "$node2" link set prp2.2 up +} + +do_ping_tests() +{ + local netid="$1" + + echo "INFO: Initial validation ping" + + do_ping "$node1" "100.64.$netid.2" + do_ping "$node2" "100.64.$netid.1" + stop_if_error "Initial validation failed on IPv4" + + do_ping "$node1" "dead:beef:$netid::2" + do_ping "$node2" "dead:beef:$netid::1" + stop_if_error "Initial validation failed on IPv6" + + echo "INFO: Longer ping test." + + do_ping_long "$node1" "100.64.$netid.2" + do_ping_long "$node2" "100.64.$netid.1" + stop_if_error "Longer ping test failed on IPv4." + + do_ping_long "$node1" "dead:beef:$netid::2" + do_ping_long "$node2" "dead:beef:$netid::1" + stop_if_error "Longer ping test failed on IPv6." +} + +run_ping_tests() +{ + echo "INFO: Running ping tests" + do_ping_tests 0 +} + +run_vlan_ping_tests() +{ + vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \ + grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \ + grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_prp1" = "off" || \ + "$vlan_challenged_prp2" = "off" ]]; then + echo "INFO: Running VLAN ping tests" + setup_vlan_interfaces + do_ping_tests 2 + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + +check_prerequisites +trap cleanup_all_ns EXIT + +setup_ns node1 node2 +setup_prp_interfaces + +run_ping_tests +run_vlan_ping_tests + +exit $ret -- 2.52.0 Previously the hsr_ping test only checked that all nodes in a VLAN are reachable (using do_ping). Update the test to also check that there is no packet loss and no duplicate packets by running the same tests for VLANs as without VLANs (including using do_ping_long). This also adds tests for IPv6 over VLAN. To unify the test code, the topology without VLANs now uses IP addresses from dead:beef:0::/64 to align with the 100.64.0.0/24 range for IPv4. Error messages are updated across the board to make it easier to find what actually failed. Also update the VLAN test to only run in VLAN 2, as there is no need to check if ping really works with VLAN IDs 2, 3, 4, and 5. This lowers the number of long ping tests on VLANs to keep the overall test runtime in bounds. It's still necessary to bump the test timeout a bit, though: a ping long tests takes 1sec, do_ping_tests performs 12 of them, do_link_problem_tests 6, and the VLAN tests again 12. With some buffer for setup and waiting and for two protocol versions, 90sec timeout seems reasonable. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- tools/testing/selftests/net/hsr/hsr_ping.sh | 188 +++++++------------- tools/testing/selftests/net/hsr/settings | 2 +- 2 files changed, 70 insertions(+), 120 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 5a65f4f836be..ebee4a18fc67 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -27,31 +27,34 @@ while getopts "$optstring" option;do esac done -do_complete_ping_test() +do_ping_tests() { - echo "INFO: Initial validation ping." - # Each node has to be able each one. - do_ping "$ns1" 100.64.0.2 - do_ping "$ns2" 100.64.0.1 - do_ping "$ns3" 100.64.0.1 - stop_if_error "Initial validation failed." - - do_ping "$ns1" 100.64.0.3 - do_ping "$ns2" 100.64.0.3 - do_ping "$ns3" 100.64.0.2 + local netid="$1" - do_ping "$ns1" dead:beef:1::2 - do_ping "$ns1" dead:beef:1::3 - do_ping "$ns2" dead:beef:1::1 - do_ping "$ns2" dead:beef:1::2 - do_ping "$ns3" dead:beef:1::1 - do_ping "$ns3" dead:beef:1::2 + echo "INFO: Running ping tests." - stop_if_error "Initial validation failed." + echo "INFO: Initial validation ping." + # Each node has to be able to reach each one. + do_ping "$ns1" "100.64.$netid.2" + do_ping "$ns1" "100.64.$netid.3" + do_ping "$ns2" "100.64.$netid.1" + do_ping "$ns2" "100.64.$netid.3" + do_ping "$ns3" "100.64.$netid.1" + do_ping "$ns3" "100.64.$netid.2" + stop_if_error "Initial validation failed on IPv4." + + do_ping "$ns1" "dead:beef:$netid::2" + do_ping "$ns1" "dead:beef:$netid::3" + do_ping "$ns2" "dead:beef:$netid::1" + do_ping "$ns2" "dead:beef:$netid::2" + do_ping "$ns3" "dead:beef:$netid::1" + do_ping "$ns3" "dead:beef:$netid::2" + stop_if_error "Initial validation failed on IPv6." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. + echo "INFO: Wait for node table entries to be merged." WAIT=5 while [ ${WAIT} -gt 0 ] do @@ -68,24 +71,28 @@ do_complete_ping_test() sleep 1 echo "INFO: Longer ping test." - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" dead:beef:1::2 - do_ping_long "$ns1" 100.64.0.3 - do_ping_long "$ns1" dead:beef:1::3 - - stop_if_error "Longer ping test failed." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" dead:beef:1::1 - do_ping_long "$ns2" 100.64.0.3 - do_ping_long "$ns2" dead:beef:1::2 - stop_if_error "Longer ping test failed." + do_ping_long "$ns1" "100.64.$netid.2" + do_ping_long "$ns1" "dead:beef:$netid::2" + do_ping_long "$ns1" "100.64.$netid.3" + do_ping_long "$ns1" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns1)." + + do_ping_long "$ns2" "100.64.$netid.1" + do_ping_long "$ns2" "dead:beef:$netid::1" + do_ping_long "$ns2" "100.64.$netid.3" + do_ping_long "$ns2" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns2)." + + do_ping_long "$ns3" "100.64.$netid.1" + do_ping_long "$ns3" "dead:beef:$netid::1" + do_ping_long "$ns3" "100.64.$netid.2" + do_ping_long "$ns3" "dead:beef:$netid::2" + stop_if_error "Longer ping test failed (ns3)." +} - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" dead:beef:1::1 - do_ping_long "$ns3" 100.64.0.2 - do_ping_long "$ns3" dead:beef:1::2 - stop_if_error "Longer ping test failed." +do_link_problem_tests() +{ + echo "INFO: Running link problem tests." echo "INFO: Cutting one link." do_ping_long "$ns1" 100.64.0.3 & @@ -104,26 +111,22 @@ do_complete_ping_test() do_ping_long "$ns1" 100.64.0.2 do_ping_long "$ns1" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." + stop_if_error "Failed with delay and packetloss (ns1)." do_ping_long "$ns2" 100.64.0.1 do_ping_long "$ns2" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." + stop_if_error "Failed with delay and packetloss (ns2)." do_ping_long "$ns3" 100.64.0.1 do_ping_long "$ns3" 100.64.0.2 - stop_if_error "Failed with delay and packetloss." - - echo "INFO: All good." + stop_if_error "Failed with delay and packetloss (ns3)." } setup_hsr_interfaces() { local HSRv="$1" - echo "INFO: preparing interfaces for HSRv${HSRv}." + echo "INFO: Preparing interfaces for HSRv${HSRv}." # Three HSR nodes. Each node has one link to each of its neighbour, two links in total. # # ns1eth1 ----- ns2eth1 @@ -140,17 +143,20 @@ setup_hsr_interfaces() ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" # HSRv0/1 - ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \ + slave2 ns1eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \ + slave2 ns2eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \ + slave2 ns3eth2 supervision 45 version "$HSRv" proto 0 # IP for HSR ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 - ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 - ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 - ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 @@ -177,85 +183,33 @@ setup_hsr_interfaces() setup_vlan_interfaces() { ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 - ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 - ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 - ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 - ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 - ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 - ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 - ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 - ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 - ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 - ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 - ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 - ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 - ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 - ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 - ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 - ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 - ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 - ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 - ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 - ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad ip -net "$ns1" link set dev hsr1.2 up - ip -net "$ns1" link set dev hsr1.3 up - ip -net "$ns1" link set dev hsr1.4 up - ip -net "$ns1" link set dev hsr1.5 up - ip -net "$ns2" link set dev hsr2.2 up - ip -net "$ns2" link set dev hsr2.3 up - ip -net "$ns2" link set dev hsr2.4 up - ip -net "$ns2" link set dev hsr2.5 up - ip -net "$ns3" link set dev hsr3.2 up - ip -net "$ns3" link set dev hsr3.3 up - ip -net "$ns3" link set dev hsr3.4 up - ip -net "$ns3" link set dev hsr3.5 up } -hsr_vlan_ping() { - do_ping "$ns1" 100.64.2.2 - do_ping "$ns1" 100.64.3.2 - do_ping "$ns1" 100.64.4.2 - do_ping "$ns1" 100.64.5.2 - - do_ping "$ns1" 100.64.2.3 - do_ping "$ns1" 100.64.3.3 - do_ping "$ns1" 100.64.4.3 - do_ping "$ns1" 100.64.5.3 - - do_ping "$ns2" 100.64.2.1 - do_ping "$ns2" 100.64.3.1 - do_ping "$ns2" 100.64.4.1 - do_ping "$ns2" 100.64.5.1 - - do_ping "$ns2" 100.64.2.3 - do_ping "$ns2" 100.64.3.3 - do_ping "$ns2" 100.64.4.3 - do_ping "$ns2" 100.64.5.3 - - do_ping "$ns3" 100.64.2.1 - do_ping "$ns3" 100.64.3.1 - do_ping "$ns3" 100.64.4.1 - do_ping "$ns3" 100.64.5.1 - - do_ping "$ns3" 100.64.2.2 - do_ping "$ns3" 100.64.3.2 - do_ping "$ns3" 100.64.4.2 - do_ping "$ns3" 100.64.5.2 +run_complete_ping_tests() +{ + echo "INFO: Running complete ping tests." + do_ping_tests 0 + do_link_problem_tests } -run_vlan_tests() { +run_vlan_tests() +{ vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') @@ -263,27 +217,23 @@ run_vlan_tests() { if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then echo "INFO: Running VLAN tests" setup_vlan_interfaces - hsr_vlan_ping + do_ping_tests 2 else echo "INFO: Not Running VLAN tests as the device does not support VLAN" fi } check_prerequisites -setup_ns ns1 ns2 ns3 - trap cleanup_all_ns EXIT +setup_ns ns1 ns2 ns3 setup_hsr_interfaces 0 -do_complete_ping_test - +run_complete_ping_tests run_vlan_tests setup_ns ns1 ns2 ns3 - setup_hsr_interfaces 1 -do_complete_ping_test - +run_complete_ping_tests run_vlan_tests exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings index 0fbc037f2aa8..ba4d85f74cd6 100644 --- a/tools/testing/selftests/net/hsr/settings +++ b/tools/testing/selftests/net/hsr/settings @@ -1 +1 @@ -timeout=50 +timeout=90 -- 2.52.0 Add a test case that can support different types of faulty links for all protocol versions (HSRv0, HSRv1, PRPv1). It starts with a baseline with fully functional links. The first faulty case is one link being cut during the ping. This test uses a different function for ping that sends more packets in shorter intervals to stress the duplicate detection algorithms a bit more and allow for future tests with other link faults (packet loss, reordering, etc.). As the link fault tests now cover the cut link for HSR and PRP, it can be removed from the hsr_ping test. Note that the removed cut link test did not really test the fault because do_ping_long takes about 1sec while the link is only cut after a 3sec sleep. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- tools/testing/selftests/net/hsr/Makefile | 1 + tools/testing/selftests/net/hsr/hsr_ping.sh | 11 - .../testing/selftests/net/hsr/link_faults.sh | 271 ++++++++++++++++++ 3 files changed, 272 insertions(+), 11 deletions(-) create mode 100755 tools/testing/selftests/net/hsr/link_faults.sh diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 1886f345897a..31fb9326cf53 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -5,6 +5,7 @@ top_srcdir = ../../../../.. TEST_PROGS := \ hsr_ping.sh \ hsr_redbox.sh \ + link_faults.sh \ prp_ping.sh \ # end of TEST_PROGS diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index ebee4a18fc67..0ec71b20ab75 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -94,17 +94,6 @@ do_link_problem_tests() { echo "INFO: Running link problem tests." - echo "INFO: Cutting one link." - do_ping_long "$ns1" 100.64.0.3 & - - sleep 3 - ip -net "$ns3" link set ns3eth1 down - wait - - ip -net "$ns3" link set ns3eth1 up - - stop_if_error "Failed with one link down." - echo "INFO: Delay the link and drop a few packages." tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh new file mode 100755 index 000000000000..7ff14dcd32e7 --- /dev/null +++ b/tools/testing/selftests/net/hsr/link_faults.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2329 + +source ../lib.sh + +ALL_TESTS=" + test_clean_hsrv0 + test_cut_link_hsrv0 + test_clean_hsrv1 + test_cut_link_hsrv1 + test_clean_prp + test_cut_link_prp +" + +# The tests are running ping for 5sec with a relatively short interval with a +# cut link, which should be recoverable by HSR/PRP. + +setup_hsr_topo() +{ + # Three HSR nodes in a ring, every node has a LAN A interface connected + # to the LAN B interface of the next node. + # + # node1 node2 + # + # vethA -------- vethB + # hsr1 hsr2 + # vethB vethA + # \ / + # vethA vethB + # hsr3 + # + # node3 + + local ver="$1" + + setup_ns node1 node2 node3 + + # veth links + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethB netns "$node2" + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node2" type veth peer name vethB netns "$node3" + ip link add vethA netns "$node3" type veth peer name vethB netns "$node1" + + # MAC addresses (not needed for HSR operation, but helps with debugging) + ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA + ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB + + ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB + + ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA + ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB + + # HSR interfaces + ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$node2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$node3" addr add 100.64.0.3/24 dev hsr3 + + # Set all links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set hsr1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set hsr2 up + + ip -net "$node3" link set vethA up + ip -net "$node3" link set vethB up + ip -net "$node3" link set hsr3 up +} + +setup_prp_topo() +{ + # Two PRP nodes, connected by two links (treated as LAN A and LAN B). + # + # vethA ----- vethA + # prp1 prp2 + # vethB ----- vethB + # + # node1 node2 + + setup_ns node1 node2 + + # veth links + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP interfaces + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +wait_for_hsr_node_table() +{ + log_info "Wait for node table entries to be merged." + WAIT=5 + while [ "${WAIT}" -gt 0 ]; do + nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table) + + # We need entries in the node tables, and they need to be merged + if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \ + ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then + return + fi + + sleep 1 + ((WAIT--)) + done + check_err 1 "Failed to wait for merged node table entries" +} + +setup_topo() +{ + local proto="$1" + + if [ "$proto" = "HSRv0" ]; then + setup_hsr_topo 0 + wait_for_hsr_node_table + elif [ "$proto" = "HSRv1" ]; then + setup_hsr_topo 1 + wait_for_hsr_node_table + elif [ "$proto" = "PRP" ]; then + setup_prp_topo + else + check_err 1 "Unknown protocol (${proto})" + fi +} + +check_ping() +{ + local node="$1" + local dst="$2" + local ping_args="-q -i 0.01 -c 400" + + log_info "Running ping $node -> $dst" + # shellcheck disable=SC2086 + output=$(ip netns exec "$node" ping $ping_args "$dst" | \ + grep "packets transmitted") + log_info "$output" + + dups=0 + loss=0 + + if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then + dups="${BASH_REMATCH[1]}" + fi + if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then + loss="${BASH_REMATCH[1]}" + fi + + check_err "$dups" "Unexpected duplicate packets (${dups})" + if [ "$loss" != "0%" ]; then + check_err 1 "Unexpected packet loss (${loss})" + fi +} + +test_clean() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + check_ping "$node1" "100.64.0.2" + + log_test "${tname}" +} + +test_clean_hsrv0() +{ + test_clean "HSRv0" +} + +test_clean_hsrv1() +{ + test_clean "HSRv1" +} + +test_clean_prp() +{ + test_clean "PRP" +} + +test_cut_link() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Cutting link from subshell, so check_ping can run in the normal shell + # with access to global variables from the test harness. + ( + sleep 2 + log_info "Cutting link" + ip -net "$node1" link set vethB down + ) & + check_ping "$node1" "100.64.0.2" + + wait + log_test "${tname}" +} + + +test_cut_link_hsrv0() +{ + test_cut_link "HSRv0" +} + +test_cut_link_hsrv1() +{ + test_cut_link "HSRv1" +} + +test_cut_link_prp() +{ + test_cut_link "PRP" +} + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +tests_run + +exit $EXIT_STATUS -- 2.52.0 The PRP duplicate discard algorithm does not work reliably with certain link faults. Especially with packet loss on one link, the duplicate discard algorithm drops valid packets which leads to packet loss on the PRP interface where the link fault should in theory be perfectly recoverable by PRP. This happens because the algorithm opens the drop window on the lossy link, covering received and lost sequence numbers. If the other, non-lossy link receives the duplicate for a lost frame, it is within the drop window of the lossy link and therefore dropped. Since IEC 62439-3:2012, a node has one sequence number counter for frames it sends, instead of one sequence number counter for each destination. Therefore, a node can not expect to receive contiguous sequence numbers from a sender. A missing sequence number can be totally normal (if the sender intermittently communicates with another node) or mean a frame was lost. The algorithm, as previously implemented in commit 05fd00e5e7b1 ("net: hsr: Fix PRP duplicate detection"), was part of IEC 62439-3:2010 (HSRv0/PRPv0) but was removed with IEC 62439-3:2012 (HSRv1/PRPv1). Since that, no algorithm is specified but up to implementers. It should be "designed such that it never rejects a legitimate frame, while occasional acceptance of a duplicate can be tolerated" (IEC 62439-3:2021). For the duplicate discard algorithm, this means that 1) we need to track the sequence numbers individually to account for non-contiguous sequence numbers, and 2) we should always err on the side of accepting a duplicate than dropping a valid frame. The idea of the new algorithm is to store the seen sequence numbers in a bitmap. To keep the size of the bitmap in control, we store it as a "sparse bitmap" where the bitmap is split into blocks and not all blocks exist at the same time. The sparse bitmap is implemented using an xarray that keeps the references to the individual blocks and a backing ring buffer that stores the actual blocks. New blocks are initialized in the buffer and added to the xarray as needed when new frames arrive. Existing blocks are removed in two conditions: 1. The block found for an arriving sequence number is old and therefore not relevant to the duplicate discard algorithm anymore, i.e., it has been added more than the entry forget time ago. In this case, the block is removed from the xarray and marked as forgotten (by setting its timestamp to 0). 2. Space is needed in the ring buffer for a new block. In this case, the block is removed from the xarray, if it hasn't already been forgotten (by 1.). Afterwards, the new block is initialized in its place. This has the nice property that we can reliably track sequence numbers on low traffic situations (where they expire based on their timestamp) and more quickly forget sequence numbers in high traffic situations before they potentially wrap over and repeat before they are expired. When nodes are merged, the blocks are merged as well. The timestamp of a merged block is set to the minimum of the two timestamps to never keep around a seen sequence number for too long. The bitmaps are or'd to mark all seen sequence numbers as seen. All of this still happens under seq_out_lock, to prevent concurrent access to the blocks. The KUnit test for the algorithm is updated as well. The updates are done in a way to match the original intends pretty closely. Currently, there is much knowledge about the actual algorithm baked into the tests (especially the expectations) which may need some redesign in the future. Reported-by: Steffen Lindner Fixes: 05fd00e5e7b1 ("net: hsr: Fix PRP duplicate detection") Signed-off-by: Felix Maurer Tested-by: Steffen Lindner --- Notes: The bug was reported by Steffen directly to us, I asked for permission to add his Reported-by tag. net/hsr/hsr_framereg.c | 212 +++++++++++++++++++++------------ net/hsr/hsr_framereg.h | 25 +++- net/hsr/prp_dup_discard_test.c | 138 +++++++++++---------- 3 files changed, 237 insertions(+), 138 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 3a2a2fa7a0a3..d1052c23575e 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -11,6 +11,7 @@ * Same code handles filtering of duplicates for PRP as well. */ +#include #include #include #include @@ -35,7 +36,6 @@ static bool seq_nr_after(u16 a, u16 b) #define seq_nr_before(a, b) seq_nr_after((b), (a)) #define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) -#define PRP_DROP_WINDOW_LEN 32768 bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) { @@ -126,13 +126,29 @@ void hsr_del_self_node(struct hsr_priv *hsr) kfree_rcu(old, rcu_head); } +static void hsr_free_node(struct hsr_node *node) +{ + xa_destroy(&node->seq_blocks); + kfree(node->block_buf); + kfree(node); +} + +static void hsr_free_node_rcu(struct rcu_head *rn) +{ + struct hsr_node *node = container_of(rn, struct hsr_node, rcu_head); + + hsr_free_node(node); +} + void hsr_del_nodes(struct list_head *node_db) { struct hsr_node *node; struct hsr_node *tmp; - list_for_each_entry_safe(node, tmp, node_db, mac_list) - kfree(node); + list_for_each_entry_safe(node, tmp, node_db, mac_list) { + list_del(&node->mac_list); + hsr_free_node(node); + } } void prp_handle_san_frame(bool san, enum hsr_port_type port, @@ -158,7 +174,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, u16 seq_out, bool san, enum hsr_port_type rx_port) { - struct hsr_node *new_node, *node; + struct hsr_node *new_node, *node = NULL; unsigned long now; int i; @@ -169,6 +185,14 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, ether_addr_copy(new_node->macaddress_A, addr); spin_lock_init(&new_node->seq_out_lock); + new_node->block_buf = kcalloc(HSR_MAX_SEQ_BLOCKS, + sizeof(struct hsr_seq_block), + GFP_ATOMIC); + if (!new_node->block_buf) + goto free; + + xa_init(&new_node->seq_blocks); + /* We are only interested in time diffs here, so use current jiffies * as initialization. (0 could trigger an spurious ring error warning). */ @@ -176,11 +200,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, for (i = 0; i < HSR_PT_PORTS; i++) { new_node->time_in[i] = now; new_node->time_out[i] = now; - } - for (i = 0; i < HSR_PT_PORTS; i++) { new_node->seq_out[i] = seq_out; - new_node->seq_expected[i] = seq_out + 1; - new_node->seq_start[i] = seq_out + 1; } if (san && hsr->proto_ops->handle_san_frame) @@ -199,6 +219,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, return new_node; out: spin_unlock_bh(&hsr->list_lock); + kfree(new_node->block_buf); +free: kfree(new_node); return node; } @@ -280,6 +302,62 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, san, rx_port); } +static bool hsr_seq_block_is_old(struct hsr_seq_block *block) +{ + unsigned long expiry = msecs_to_jiffies(HSR_ENTRY_FORGET_TIME); + + return time_is_before_jiffies(block->time + expiry); +} + +static void hsr_forget_seq_block(struct hsr_node *node, + struct hsr_seq_block *block) +{ + if (block->time) + xa_erase(&node->seq_blocks, block->block_idx); + block->time = 0; +} + +/* Get the currently active sequence number block. If there is no block yet, or + * the existing one is expired, a new block is created. The idea is to maintain + * a "sparse bitmap" where a bitmap for the whole sequence number space is + * split into blocks and not all blocks exist all the time. The blocks can + * expire after time (in low traffic situations) or when they are replaced in + * the backing fixed size buffer (in high traffic situations). + */ +VISIBLE_IF_KUNIT struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, + u16 block_idx) +{ + struct hsr_seq_block *block, *res; + + block = xa_load(&node->seq_blocks, block_idx); + + if (block && hsr_seq_block_is_old(block)) { + hsr_forget_seq_block(node, block); + block = NULL; + } + + if (!block) { + block = &node->block_buf[node->next_block]; + hsr_forget_seq_block(node, block); + + memset(block, 0, sizeof(*block)); + block->time = jiffies; + block->block_idx = block_idx; + + res = xa_store(&node->seq_blocks, block_idx, block, GFP_ATOMIC); + if (xa_is_err(res)) { + block->time = 0; + return NULL; + } + + node->next_block = + (node->next_block + 1) & (HSR_MAX_SEQ_BLOCKS - 1); + } + + return block; +} +EXPORT_SYMBOL_IF_KUNIT(hsr_get_seq_block); + /* Use the Supervision frame's info about an eventual macaddress_B for merging * nodes that has previously had their macaddress_B registered as a separate * node. @@ -288,16 +366,18 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) { struct hsr_node *node_curr = frame->node_src; struct hsr_port *port_rcv = frame->port_rcv; + struct hsr_seq_block *src_blk, *merge_blk; struct hsr_priv *hsr = port_rcv->hsr; - struct hsr_sup_payload *hsr_sp; struct hsr_sup_tlv *hsr_sup_tlv; + struct hsr_sup_payload *hsr_sp; struct hsr_node *node_real; struct sk_buff *skb = NULL; struct list_head *node_db; struct ethhdr *ethhdr; - int i; - unsigned int pull_size = 0; unsigned int total_pull_size = 0; + unsigned int pull_size = 0; + unsigned long idx; + int i; /* Here either frame->skb_hsr or frame->skb_prp should be * valid as supervision frame always will have protocol @@ -391,6 +471,18 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) node_real->seq_out[i] = node_curr->seq_out[i]; } + + xa_for_each(&node_curr->seq_blocks, idx, src_blk) { + if (hsr_seq_block_is_old(src_blk)) + continue; + + merge_blk = hsr_get_seq_block(node_real, src_blk->block_idx); + if (!merge_blk) + continue; + merge_blk->time = min(merge_blk->time, src_blk->time); + bitmap_or(merge_blk->seq_nrs, merge_blk->seq_nrs, + src_blk->seq_nrs, HSR_SEQ_BLOCK_SIZE); + } spin_unlock_bh(&node_real->seq_out_lock); node_real->addr_B_port = port_rcv->type; @@ -398,7 +490,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (!node_curr->removed) { list_del_rcu(&node_curr->mac_list); node_curr->removed = true; - kfree_rcu(node_curr, rcu_head); + call_rcu(&node_curr->rcu_head, hsr_free_node_rcu); } spin_unlock_bh(&hsr->list_lock); @@ -505,17 +597,12 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) return 0; } -/* Adaptation of the PRP duplicate discard algorithm described in wireshark - * wiki (https://wiki.wireshark.org/PRP) - * - * A drop window is maintained for both LANs with start sequence set to the - * first sequence accepted on the LAN that has not been seen on the other LAN, - * and expected sequence set to the latest received sequence number plus one. - * - * When a frame is received on either LAN it is compared against the received - * frames on the other LAN. If it is outside the drop window of the other LAN - * the frame is accepted and the drop window is updated. - * The drop window for the other LAN is reset. +/* PRP duplicate discard algorithm: we maintain a bitmap where we set a bit for + * every seen sequence number. The bitmap is split into blocks and the block + * management is detailed in hsr_get_seq_block(). In any case, we err on the + * side of accepting a packet, as the specification requires the algorithm to + * be "designed such that it never rejects a legitimate frame, while occasional + * acceptance of a duplicate can be tolerated." (IEC 62439-3:2021, 4.1.10.3). * * 'port' is the outgoing interface * 'frame' is the frame to be sent @@ -526,18 +613,21 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) */ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) { - enum hsr_port_type other_port; - enum hsr_port_type rcv_port; + u16 sequence_nr, seq_bit, block_idx; + struct hsr_seq_block *block; struct hsr_node *node; - u16 sequence_diff; - u16 sequence_exp; - u16 sequence_nr; - /* out-going frames are always in order - * and can be checked the same way as for HSR - */ - if (frame->port_rcv->type == HSR_PT_MASTER) - return hsr_register_frame_out(port, frame); + node = frame->node_src; + sequence_nr = frame->sequence_nr; + + // out-going frames are always in order + if (frame->port_rcv->type == HSR_PT_MASTER) { + spin_lock_bh(&node->seq_out_lock); + node->time_out[port->type] = jiffies; + node->seq_out[port->type] = sequence_nr; + spin_unlock_bh(&node->seq_out_lock); + return 0; + } /* for PRP we should only forward frames from the slave ports * to the master port @@ -545,52 +635,28 @@ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) if (port->type != HSR_PT_MASTER) return 1; - node = frame->node_src; - sequence_nr = frame->sequence_nr; - sequence_exp = sequence_nr + 1; - rcv_port = frame->port_rcv->type; - other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B : - HSR_PT_SLAVE_A; - spin_lock_bh(&node->seq_out_lock); - if (time_is_before_jiffies(node->time_out[port->type] + - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) || - (node->seq_start[rcv_port] == node->seq_expected[rcv_port] && - node->seq_start[other_port] == node->seq_expected[other_port])) { - /* the node hasn't been sending for a while - * or both drop windows are empty, forward the frame - */ - node->seq_start[rcv_port] = sequence_nr; - } else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) && - seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) { - /* drop the frame, update the drop window for the other port - * and reset our drop window - */ - node->seq_start[other_port] = sequence_exp; - node->seq_expected[rcv_port] = sequence_exp; - node->seq_start[rcv_port] = node->seq_expected[rcv_port]; - spin_unlock_bh(&node->seq_out_lock); - return 1; - } - /* update the drop window for the port where this frame was received - * and clear the drop window for the other port - */ - node->seq_start[other_port] = node->seq_expected[other_port]; - node->seq_expected[rcv_port] = sequence_exp; - sequence_diff = sequence_exp - node->seq_start[rcv_port]; - if (sequence_diff > PRP_DROP_WINDOW_LEN) - node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN; + block_idx = hsr_seq_block_index(sequence_nr); + block = hsr_get_seq_block(node, block_idx); + if (!block) + goto out_new; + + seq_bit = hsr_seq_block_bit(sequence_nr); + if (__test_and_set_bit(seq_bit, block->seq_nrs)) + goto out_seen; node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; +out_new: spin_unlock_bh(&node->seq_out_lock); return 0; -} -#if IS_MODULE(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST) -EXPORT_SYMBOL(prp_register_frame_out); -#endif +out_seen: + spin_unlock_bh(&node->seq_out_lock); + return 1; +} +EXPORT_SYMBOL_IF_KUNIT(prp_register_frame_out); static struct hsr_port *get_late_port(struct hsr_priv *hsr, struct hsr_node *node) @@ -672,7 +738,7 @@ void hsr_prune_nodes(struct timer_list *t) list_del_rcu(&node->mac_list); node->removed = true; /* Note that we need to free this entry later: */ - kfree_rcu(node, rcu_head); + call_rcu(&node->rcu_head, hsr_free_node_rcu); } } } @@ -706,7 +772,7 @@ void hsr_prune_proxy_nodes(struct timer_list *t) list_del_rcu(&node->mac_list); node->removed = true; /* Note that we need to free this entry later: */ - kfree_rcu(node, rcu_head); + call_rcu(&node->rcu_head, hsr_free_node_rcu); } } } diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index b04948659d84..686f2a595400 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -74,9 +74,27 @@ bool hsr_is_node_in_db(struct list_head *node_db, int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame); +#if IS_ENABLED(CONFIG_KUNIT) +struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, u16 block_idx); +#endif + +#define HSR_SEQ_BLOCK_SHIFT 7 /* 128 bits */ +#define HSR_SEQ_BLOCK_SIZE (1 << HSR_SEQ_BLOCK_SHIFT) +#define HSR_SEQ_BLOCK_MASK (HSR_SEQ_BLOCK_SIZE - 1) +#define HSR_MAX_SEQ_BLOCKS 64 + +#define hsr_seq_block_index(sequence_nr) ((sequence_nr) >> HSR_SEQ_BLOCK_SHIFT) +#define hsr_seq_block_bit(sequence_nr) ((sequence_nr) & HSR_SEQ_BLOCK_MASK) + +struct hsr_seq_block { + unsigned long time; + u16 block_idx; + DECLARE_BITMAP(seq_nrs, HSR_SEQ_BLOCK_SIZE); +}; + struct hsr_node { struct list_head mac_list; - /* Protect R/W access to seq_out */ + /* Protect R/W access to seq_out and seq_blocks */ spinlock_t seq_out_lock; unsigned char macaddress_A[ETH_ALEN]; unsigned char macaddress_B[ETH_ALEN]; @@ -91,8 +109,9 @@ struct hsr_node { u16 seq_out[HSR_PT_PORTS]; bool removed; /* PRP specific duplicate handling */ - u16 seq_expected[HSR_PT_PORTS]; - u16 seq_start[HSR_PT_PORTS]; + struct xarray seq_blocks; + struct hsr_seq_block *block_buf; + unsigned int next_block; struct rcu_head rcu_head; }; diff --git a/net/hsr/prp_dup_discard_test.c b/net/hsr/prp_dup_discard_test.c index e86b7b633ae8..bfa3d318ec04 100644 --- a/net/hsr/prp_dup_discard_test.c +++ b/net/hsr/prp_dup_discard_test.c @@ -4,6 +4,8 @@ #include "hsr_main.h" #include "hsr_framereg.h" +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); + struct prp_test_data { struct hsr_port port; struct hsr_port port_rcv; @@ -17,13 +19,17 @@ static struct prp_test_data *build_prp_test_data(struct kunit *test) sizeof(struct prp_test_data), GFP_USER); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data); + data->node.block_buf = kunit_kcalloc(test, HSR_MAX_SEQ_BLOCKS, + sizeof(struct hsr_seq_block), + GFP_ATOMIC); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data->node.block_buf); + + xa_init(&data->node.seq_blocks); + spin_lock_init(&data->node.seq_out_lock); + data->frame.node_src = &data->node; data->frame.port_rcv = &data->port_rcv; data->port_rcv.type = HSR_PT_SLAVE_A; - data->node.seq_start[HSR_PT_SLAVE_A] = 1; - data->node.seq_expected[HSR_PT_SLAVE_A] = 1; - data->node.seq_start[HSR_PT_SLAVE_B] = 1; - data->node.seq_expected[HSR_PT_SLAVE_B] = 1; data->node.seq_out[HSR_PT_MASTER] = 0; data->node.time_out[HSR_PT_MASTER] = jiffies; data->port.type = HSR_PT_MASTER; @@ -31,19 +37,32 @@ static struct prp_test_data *build_prp_test_data(struct kunit *test) return data; } -static void check_prp_counters(struct kunit *test, - struct prp_test_data *data, - u16 seq_start_a, u16 seq_expected_a, - u16 seq_start_b, u16 seq_expected_b) +static void check_prp_frame_seen(struct kunit *test, struct prp_test_data *data, + u16 sequence_nr) +{ + u16 block_idx, seq_bit; + struct hsr_seq_block *block; + + block_idx = sequence_nr >> HSR_SEQ_BLOCK_SHIFT; + block = xa_load(&data->node.seq_blocks, block_idx); + KUNIT_EXPECT_NOT_NULL(test, block); + + seq_bit = sequence_nr & HSR_SEQ_BLOCK_MASK; + KUNIT_EXPECT_TRUE(test, test_bit(seq_bit, block->seq_nrs)); +} + +static void check_prp_frame_unseen(struct kunit *test, + struct prp_test_data *data, u16 sequence_nr) { - KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_A], - seq_start_a); - KUNIT_EXPECT_EQ(test, data->node.seq_start[HSR_PT_SLAVE_B], - seq_start_b); - KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_A], - seq_expected_a); - KUNIT_EXPECT_EQ(test, data->node.seq_expected[HSR_PT_SLAVE_B], - seq_expected_b); + u16 block_idx, seq_bit; + struct hsr_seq_block *block; + + block_idx = sequence_nr >> HSR_SEQ_BLOCK_SHIFT; + block = hsr_get_seq_block(&data->node, block_idx); + KUNIT_EXPECT_NOT_NULL(test, block); + + seq_bit = sequence_nr & HSR_SEQ_BLOCK_MASK; + KUNIT_EXPECT_FALSE(test, test_bit(seq_bit, block->seq_nrs)); } static void prp_dup_discard_forward(struct kunit *test) @@ -57,49 +76,58 @@ static void prp_dup_discard_forward(struct kunit *test) KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); - check_prp_counters(test, data, data->frame.sequence_nr, - data->frame.sequence_nr + 1, 1, 1); + check_prp_frame_seen(test, data, data->frame.sequence_nr); } -static void prp_dup_discard_inside_dropwindow(struct kunit *test) +static void prp_dup_discard_drop_duplicate(struct kunit *test) { - /* Normal situation, other LAN ahead by one. Frame is dropped */ struct prp_test_data *data = build_prp_test_data(test); unsigned long time = jiffies - 10; - data->frame.sequence_nr = 1; - data->node.seq_expected[HSR_PT_SLAVE_B] = 3; - data->node.seq_out[HSR_PT_MASTER] = 2; + data->frame.sequence_nr = 2; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_frame_seen(test, data, data->frame.sequence_nr); data->node.time_out[HSR_PT_MASTER] = time; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, 2, data->node.seq_out[HSR_PT_MASTER]); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); KUNIT_EXPECT_EQ(test, time, data->node.time_out[HSR_PT_MASTER]); - check_prp_counters(test, data, 2, 2, 2, 3); + check_prp_frame_seen(test, data, data->frame.sequence_nr); } -static void prp_dup_discard_node_timeout(struct kunit *test) +static void prp_dup_discard_entry_timeout(struct kunit *test) { /* Timeout situation, node hasn't sent anything for a while */ struct prp_test_data *data = build_prp_test_data(test); + struct hsr_seq_block *block; + u16 block_idx; data->frame.sequence_nr = 7; - data->node.seq_start[HSR_PT_SLAVE_A] = 1234; - data->node.seq_expected[HSR_PT_SLAVE_A] = 1235; - data->node.seq_start[HSR_PT_SLAVE_B] = 1234; - data->node.seq_expected[HSR_PT_SLAVE_B] = 1234; - data->node.seq_out[HSR_PT_MASTER] = 1234; - data->node.time_out[HSR_PT_MASTER] = - jiffies - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME) - 1; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_frame_seen(test, data, data->frame.sequence_nr); + + data->frame.sequence_nr = 11; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + check_prp_frame_seen(test, data, data->frame.sequence_nr); + + block_idx = data->frame.sequence_nr >> HSR_SEQ_BLOCK_SHIFT; + block = hsr_get_seq_block(&data->node, block_idx); + block->time = jiffies - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME) - 1; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); - check_prp_counters(test, data, data->frame.sequence_nr, - data->frame.sequence_nr + 1, 1234, 1234); + check_prp_frame_seen(test, data, data->frame.sequence_nr); + check_prp_frame_unseen(test, data, 7); } static void prp_dup_discard_out_of_sequence(struct kunit *test) @@ -107,11 +135,13 @@ static void prp_dup_discard_out_of_sequence(struct kunit *test) /* One frame is received out of sequence on both LANs */ struct prp_test_data *data = build_prp_test_data(test); - data->node.seq_start[HSR_PT_SLAVE_A] = 10; - data->node.seq_expected[HSR_PT_SLAVE_A] = 10; - data->node.seq_start[HSR_PT_SLAVE_B] = 10; - data->node.seq_expected[HSR_PT_SLAVE_B] = 10; - data->node.seq_out[HSR_PT_MASTER] = 9; + /* initial frame, should be accepted */ + data->frame.sequence_nr = 9; + KUNIT_EXPECT_EQ(test, 0, + prp_register_frame_out(&data->port, &data->frame)); + KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, + data->node.seq_out[HSR_PT_MASTER]); + check_prp_frame_seen(test, data, data->frame.sequence_nr); /* 1st old frame, should be accepted */ data->frame.sequence_nr = 8; @@ -119,18 +149,13 @@ static void prp_dup_discard_out_of_sequence(struct kunit *test) prp_register_frame_out(&data->port, &data->frame)); KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); - check_prp_counters(test, data, data->frame.sequence_nr, - data->frame.sequence_nr + 1, 10, 10); + check_prp_frame_seen(test, data, data->frame.sequence_nr); /* 2nd frame should be dropped */ data->frame.sequence_nr = 8; data->port_rcv.type = HSR_PT_SLAVE_B; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - check_prp_counters(test, data, data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1); /* Next frame, this is forwarded */ data->frame.sequence_nr = 10; @@ -139,18 +164,13 @@ static void prp_dup_discard_out_of_sequence(struct kunit *test) prp_register_frame_out(&data->port, &data->frame)); KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); - check_prp_counters(test, data, data->frame.sequence_nr, - data->frame.sequence_nr + 1, 9, 9); + check_prp_frame_seen(test, data, data->frame.sequence_nr); /* and next one is dropped */ data->frame.sequence_nr = 10; data->port_rcv.type = HSR_PT_SLAVE_B; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - check_prp_counters(test, data, data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1, - data->frame.sequence_nr + 1); } static void prp_dup_discard_lan_b_late(struct kunit *test) @@ -158,10 +178,6 @@ static void prp_dup_discard_lan_b_late(struct kunit *test) /* LAN B is behind */ struct prp_test_data *data = build_prp_test_data(test); - data->node.seq_start[HSR_PT_SLAVE_A] = 9; - data->node.seq_expected[HSR_PT_SLAVE_A] = 9; - data->node.seq_start[HSR_PT_SLAVE_B] = 9; - data->node.seq_expected[HSR_PT_SLAVE_B] = 9; data->node.seq_out[HSR_PT_MASTER] = 8; data->frame.sequence_nr = 9; @@ -169,32 +185,30 @@ static void prp_dup_discard_lan_b_late(struct kunit *test) prp_register_frame_out(&data->port, &data->frame)); KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); - check_prp_counters(test, data, 9, 10, 9, 9); + check_prp_frame_seen(test, data, data->frame.sequence_nr); data->frame.sequence_nr = 10; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, data->node.seq_out[HSR_PT_MASTER]); - check_prp_counters(test, data, 9, 11, 9, 9); + check_prp_frame_seen(test, data, data->frame.sequence_nr); data->frame.sequence_nr = 9; data->port_rcv.type = HSR_PT_SLAVE_B; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - check_prp_counters(test, data, 10, 11, 10, 10); data->frame.sequence_nr = 10; data->port_rcv.type = HSR_PT_SLAVE_B; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - check_prp_counters(test, data, 11, 11, 11, 11); } static struct kunit_case prp_dup_discard_test_cases[] = { KUNIT_CASE(prp_dup_discard_forward), - KUNIT_CASE(prp_dup_discard_inside_dropwindow), - KUNIT_CASE(prp_dup_discard_node_timeout), + KUNIT_CASE(prp_dup_discard_drop_duplicate), + KUNIT_CASE(prp_dup_discard_entry_timeout), KUNIT_CASE(prp_dup_discard_out_of_sequence), KUNIT_CASE(prp_dup_discard_lan_b_late), {} -- 2.52.0 Add tests where one link has different rates of packet loss or reorders packets. PRP should still be able to recover from these link faults and show no packet loss. However, it is acceptable to receive some level of duplicate packets. This matches the current specification (IEC 62439-3:2021) of the duplicate discard algorithm that requires it to be "designed such that it never rejects a legitimate frame, while occasional acceptance of a duplicate can be tolerated." The rate of acceptable duplicates in this test is intentionally high (10%) to make the test stable, the values I observed in the worst test cases (20% loss) are around 5% duplicates. The duplicates occur because of the 10ms ping interval in the test. As blocks expire after 400ms based on the timestamp of the first received sequence number in the block, every approx. 40th will lead to a new, clean block being used where the sequence number hasn't been seen before. As this occurs on both nodes in the test (for requests and replies), we observe around 20 duplicate frames. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- .../testing/selftests/net/hsr/link_faults.sh | 79 +++++++++++++++++-- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh index 7ff14dcd32e7..1959bea17147 100755 --- a/tools/testing/selftests/net/hsr/link_faults.sh +++ b/tools/testing/selftests/net/hsr/link_faults.sh @@ -11,10 +11,16 @@ ALL_TESTS=" test_cut_link_hsrv1 test_clean_prp test_cut_link_prp + test_packet_loss_prp + test_high_packet_loss_prp + test_reordering_prp " -# The tests are running ping for 5sec with a relatively short interval with a -# cut link, which should be recoverable by HSR/PRP. +# The tests are running ping for 5sec with a relatively short interval in +# different scenarios with faulty links (cut links, packet loss, delay, +# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms) +# is short enough that the base delay (50ms) leads to a queue in the netem +# qdiscs which is needed for reordering. setup_hsr_topo() { @@ -160,6 +166,7 @@ check_ping() { local node="$1" local dst="$2" + local accepted_dups="$3" local ping_args="-q -i 0.01 -c 400" log_info "Running ping $node -> $dst" @@ -178,7 +185,9 @@ check_ping() loss="${BASH_REMATCH[1]}" fi - check_err "$dups" "Unexpected duplicate packets (${dups})" + if [ "$dups" -gt "$accepted_dups" ]; then + check_err 1 "Unexpected duplicate packets (${dups})" + fi if [ "$loss" != "0%" ]; then check_err 1 "Unexpected packet loss (${loss})" fi @@ -197,7 +206,7 @@ test_clean() return fi - check_ping "$node1" "100.64.0.2" + check_ping "$node1" "100.64.0.2" 0 log_test "${tname}" } @@ -237,7 +246,7 @@ test_cut_link() log_info "Cutting link" ip -net "$node1" link set vethB down ) & - check_ping "$node1" "100.64.0.2" + check_ping "$node1" "100.64.0.2" 0 wait log_test "${tname}" @@ -259,6 +268,66 @@ test_cut_link_prp() test_cut_link "PRP" } +test_packet_loss() +{ + local proto="$1" + local loss="$2" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}, ${loss}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Packet loss with lower delay makes sure the packets on the lossy link + # arrive first. + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss" + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_packet_loss_prp() +{ + test_packet_loss "PRP" "20%" +} + +test_high_packet_loss_prp() +{ + test_packet_loss "PRP" "80%" +} + +test_reordering() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20% + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_reordering_prp() +{ + test_reordering "PRP" +} + cleanup() { cleanup_all_ns -- 2.52.0 The HSR duplicate discard algorithm had even more basic problems than the described for PRP in the previous patch. It relied only on the last received sequence number to decide if a new frame should be forwarded to any port. This does not work correctly in any case where frames are received out of order. The linked bug report claims that this can even happen with perfectly fine links due to the order in which incoming frames are processed (which can be unexpected on multi-core systems). The issue also occasionally shows up in the HSR selftests. The main reason is that the sequence number that was last forwarded to the master port may have skipped a number which will in turn never be delivered to the host. As the problem (we accidentally skip over a sequence number that has not been received but will be received in the future) is similar to PRP, we can apply a similar solution. The duplicate discard algorithm based on the "sparse bitmap" works well for HSR if it is extended to track one bitmap for each port (A, B, master, interlink). To do this, change the sequence number blocks to contain a flexible array member as the last member that can keep chunks for as many bitmaps as we need. This design makes it easy to reuse the same algorithm in a potential PRP RedBox implementation. The duplicate discard algorithm functions are modified to deal with sequence number blocks of different sizes and to correctly use the array of bitmap chunks. There is a notable speciality for HSR: the port type has a special port type NONE with value 0. This leads to the number of port types being 5 instead of actually 4. To save memory, remove the NONE port from the bitmap (by subtracting 1) when setting up the block buffer and when accessing the bitmap chunks in the array. Removing the old algorithm allows us to get rid of a few fields that are not needed any more: time_out and seq_out for each port. We can also remove some functions that were only necessary for the previous duplicate discard algorithm. The removal of seq_out is possible despite its previous usage in hsr_register_frame_in: it was used to prevent updates to time_in when "invalid" sequence numbers were received. With the new duplicate discard algorithm, time_in has no relevance for the expiry of sequence numbers anymore. They will expire based on the timestamps in the sequence number blocks after at most 400ms. There is no need that a node "re-registers" to "resume communication": after 400ms, all sequence numbers are accepted again. Also, according to the IEC 62439-3:2021, all nodes are supposed to send no traffic for 500ms after boot to lead exactly to this expiry of seen sequence numbers. time_in is still used for pruning nodes from the node table after no traffic has been received for 60sec. Pruning is only needed if the node is really gone and has not been sending any traffic for that period. seq_out was also used to report the last incoming sequence number from a node through netlink. I am not sure how useful this value is to userspace at all, but added getting it from the sequence number blocks. This number can be outdated after node merging until a new block has been added. Update the KUnit test for the PRP duplicate discard so that the node allocation matches and expectations on the removed fields are removed. Reported-by: Yoann Congal Closes: https://lore.kernel.org/netdev/7d221a07-8358-4c0b-a09c-3b029c052245@smile.fr/ Signed-off-by: Felix Maurer --- net/hsr/hsr_framereg.c | 216 +++++++++++++++++---------------- net/hsr/hsr_framereg.h | 20 ++- net/hsr/prp_dup_discard_test.c | 38 ++---- 3 files changed, 131 insertions(+), 143 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index d1052c23575e..e621b61958a7 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -20,23 +20,6 @@ #include "hsr_framereg.h" #include "hsr_netlink.h" -/* seq_nr_after(a, b) - return true if a is after (higher in sequence than) b, - * false otherwise. - */ -static bool seq_nr_after(u16 a, u16 b) -{ - /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) - */ - if ((int)b - a == 32768) - return false; - - return (((s16)(b - a)) < 0); -} - -#define seq_nr_before(a, b) seq_nr_after((b), (a)) -#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b))) - bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr) { if (!hsr->redbox || !is_valid_ether_addr(hsr->macaddress_redbox)) @@ -164,18 +147,16 @@ void prp_handle_san_frame(bool san, enum hsr_port_type port, node->san_b = true; } -/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A; - * seq_out is used to initialize filtering of outgoing duplicate frames - * originating from the newly added node. +/* Allocate an hsr_node and add it to node_db. 'addr' is the node's address_A. */ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, struct list_head *node_db, - unsigned char addr[], - u16 seq_out, bool san, + unsigned char addr[], bool san, enum hsr_port_type rx_port) { struct hsr_node *new_node, *node = NULL; unsigned long now; + size_t block_sz; int i; new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC); @@ -185,9 +166,13 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, ether_addr_copy(new_node->macaddress_A, addr); spin_lock_init(&new_node->seq_out_lock); - new_node->block_buf = kcalloc(HSR_MAX_SEQ_BLOCKS, - sizeof(struct hsr_seq_block), - GFP_ATOMIC); + if (hsr->prot_version == PRP_V1) + new_node->seq_port_cnt = 1; + else + new_node->seq_port_cnt = HSR_PT_PORTS - 1; + + block_sz = hsr_seq_block_size(new_node); + new_node->block_buf = kcalloc(HSR_MAX_SEQ_BLOCKS, block_sz, GFP_ATOMIC); if (!new_node->block_buf) goto free; @@ -199,8 +184,6 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, now = jiffies; for (i = 0; i < HSR_PT_PORTS; i++) { new_node->time_in[i] = now; - new_node->time_out[i] = now; - new_node->seq_out[i] = seq_out; } if (san && hsr->proto_ops->handle_san_frame) @@ -245,7 +228,6 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, struct ethhdr *ethhdr; struct prp_rct *rct; bool san = false; - u16 seq_out; if (!skb_mac_header_was_set(skb)) return NULL; @@ -282,24 +264,13 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db, /* Check if skb contains hsr_ethhdr */ if (skb->mac_len < sizeof(struct hsr_ethhdr)) return NULL; - - /* Use the existing sequence_nr from the tag as starting point - * for filtering duplicate frames. - */ - seq_out = hsr_get_skb_sequence_nr(skb) - 1; } else { rct = skb_get_PRP_rct(skb); - if (rct && prp_check_lsdu_size(skb, rct, is_sup)) { - seq_out = prp_get_skb_sequence_nr(rct); - } else { - if (rx_port != HSR_PT_MASTER) - san = true; - seq_out = HSR_SEQNR_START; - } + if (!rct && rx_port != HSR_PT_MASTER) + san = true; } - return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out, - san, rx_port); + return hsr_add_node(hsr, node_db, ethhdr->h_source, san, rx_port); } static bool hsr_seq_block_is_old(struct hsr_seq_block *block) @@ -328,6 +299,7 @@ VISIBLE_IF_KUNIT struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, u16 block_idx) { struct hsr_seq_block *block, *res; + size_t block_sz; block = xa_load(&node->seq_blocks, block_idx); @@ -337,10 +309,11 @@ VISIBLE_IF_KUNIT struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, } if (!block) { - block = &node->block_buf[node->next_block]; + block_sz = hsr_seq_block_size(node); + block = node->block_buf + node->next_block * block_sz; hsr_forget_seq_block(node, block); - memset(block, 0, sizeof(*block)); + memset(block, 0, block_sz); block->time = jiffies; block->block_idx = block_idx; @@ -420,8 +393,7 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (!node_real) /* No frame received from AddrA of this node yet */ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A, - HSR_SEQNR_START - 1, true, - port_rcv->type); + true, port_rcv->type); if (!node_real) goto done; /* No mem */ if (node_real == node_curr) @@ -468,8 +440,6 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) node_real->time_in_stale[i] = node_curr->time_in_stale[i]; } - if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i])) - node_real->seq_out[i] = node_curr->seq_out[i]; } xa_for_each(&node_curr->seq_blocks, idx, src_blk) { @@ -480,8 +450,10 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) if (!merge_blk) continue; merge_blk->time = min(merge_blk->time, src_blk->time); - bitmap_or(merge_blk->seq_nrs, merge_blk->seq_nrs, - src_blk->seq_nrs, HSR_SEQ_BLOCK_SIZE); + for (i = 0; i < node_real->seq_port_cnt; i++) { + bitmap_or(merge_blk->seq_nrs[i], merge_blk->seq_nrs[i], + src_blk->seq_nrs[i], HSR_SEQ_BLOCK_SIZE); + } } spin_unlock_bh(&node_real->seq_out_lock); node_real->addr_B_port = port_rcv->type; @@ -558,60 +530,28 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, u16 sequence_nr) { - /* Don't register incoming frames without a valid sequence number. This - * ensures entries of restarted nodes gets pruned so that they can - * re-register and resume communications. - */ - if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) && - seq_nr_before(sequence_nr, node->seq_out[port->type])) - return; - node->time_in[port->type] = jiffies; node->time_in_stale[port->type] = false; } -/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid - * ethhdr->h_source address and skb->mac_header set. - * - * Return: - * 1 if frame can be shown to have been sent recently on this interface, - * 0 otherwise, or - * negative error code on error - */ -int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) -{ - struct hsr_node *node = frame->node_src; - u16 sequence_nr = frame->sequence_nr; - - spin_lock_bh(&node->seq_out_lock); - if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && - time_is_after_jiffies(node->time_out[port->type] + - msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) { - spin_unlock_bh(&node->seq_out_lock); - return 1; - } - - node->time_out[port->type] = jiffies; - node->seq_out[port->type] = sequence_nr; - spin_unlock_bh(&node->seq_out_lock); - return 0; -} - -/* PRP duplicate discard algorithm: we maintain a bitmap where we set a bit for +/* Duplicate discard algorithm: we maintain a bitmap where we set a bit for * every seen sequence number. The bitmap is split into blocks and the block * management is detailed in hsr_get_seq_block(). In any case, we err on the * side of accepting a packet, as the specification requires the algorithm to * be "designed such that it never rejects a legitimate frame, while occasional * acceptance of a duplicate can be tolerated." (IEC 62439-3:2021, 4.1.10.3). + * While this requirement is explicit for PRP, applying it to HSR does no harm + * either. * - * 'port' is the outgoing interface * 'frame' is the frame to be sent + * 'port_type' is the type of the outgoing interface * * Return: * 1 if frame can be shown to have been sent recently on this interface, * 0 otherwise */ -int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) +static int hsr_check_duplicate(struct hsr_frame_info *frame, + unsigned int port_type) { u16 sequence_nr, seq_bit, block_idx; struct hsr_seq_block *block; @@ -620,20 +560,8 @@ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) node = frame->node_src; sequence_nr = frame->sequence_nr; - // out-going frames are always in order - if (frame->port_rcv->type == HSR_PT_MASTER) { - spin_lock_bh(&node->seq_out_lock); - node->time_out[port->type] = jiffies; - node->seq_out[port->type] = sequence_nr; - spin_unlock_bh(&node->seq_out_lock); + if (WARN_ON_ONCE(port_type >= node->seq_port_cnt)) return 0; - } - - /* for PRP we should only forward frames from the slave ports - * to the master port - */ - if (port->type != HSR_PT_MASTER) - return 1; spin_lock_bh(&node->seq_out_lock); @@ -643,11 +571,9 @@ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) goto out_new; seq_bit = hsr_seq_block_bit(sequence_nr); - if (__test_and_set_bit(seq_bit, block->seq_nrs)) + if (__test_and_set_bit(seq_bit, block->seq_nrs[port_type])) goto out_seen; - node->time_out[port->type] = jiffies; - node->seq_out[port->type] = sequence_nr; out_new: spin_unlock_bh(&node->seq_out_lock); return 0; @@ -656,6 +582,49 @@ int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) spin_unlock_bh(&node->seq_out_lock); return 1; } + +/* HSR duplicate discard: we check if the same frame has already been sent on + * this outgoing interface. The check follows the general duplicate discard + * algorithm. + * + * 'port' is the outgoing interface + * 'frame' is the frame to be sent + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise + */ +int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) +{ + return hsr_check_duplicate(frame, port->type - 1); +} + +/* PRP duplicate discard: we only consider frames that are received on port A + * or port B and should go to the master port. For those, we check if they have + * already been received by the host, i.e., master port. The check uses the + * general duplicate discard algorithm, but without tracking multiple ports. + * + * 'port' is the outgoing interface + * 'frame' is the frame to be sent + * + * Return: + * 1 if frame can be shown to have been sent recently on this interface, + * 0 otherwise + */ +int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame) +{ + // out-going frames are always in order + if (frame->port_rcv->type == HSR_PT_MASTER) + return 0; + + /* for PRP we should only forward frames from the slave ports + * to the master port + */ + if (port->type != HSR_PT_MASTER) + return 1; + + return hsr_check_duplicate(frame, 0); +} EXPORT_SYMBOL_IF_KUNIT(prp_register_frame_out); static struct hsr_port *get_late_port(struct hsr_priv *hsr, @@ -806,6 +775,39 @@ void *hsr_get_next_node(struct hsr_priv *hsr, void *_pos, return NULL; } +/* Fill the last sequence number that has been received from node on if1 by + * finding the last sequence number sent on port B; accordingly get the last + * received sequence number for if2 using sent sequence numbers on port A. + */ +static void fill_last_seq_nrs(struct hsr_node *node, u16 *if1_seq, u16 *if2_seq) +{ + struct hsr_seq_block *block; + unsigned int block_off; + size_t block_sz; + u16 seq_bit; + + spin_lock_bh(&node->seq_out_lock); + + // Get last inserted block + block_off = (node->next_block - 1) & (HSR_MAX_SEQ_BLOCKS - 1); + block_sz = hsr_seq_block_size(node); + block = node->block_buf + block_off * block_sz; + + if (!bitmap_empty(block->seq_nrs[HSR_PT_SLAVE_B - 1], + HSR_SEQ_BLOCK_SIZE)) { + seq_bit = find_last_bit(block->seq_nrs[HSR_PT_SLAVE_B - 1], + HSR_SEQ_BLOCK_SIZE); + *if1_seq = (block->block_idx << HSR_SEQ_BLOCK_SHIFT) | seq_bit; + } + if (!bitmap_empty(block->seq_nrs[HSR_PT_SLAVE_A - 1], + HSR_SEQ_BLOCK_SIZE)) { + seq_bit = find_last_bit(block->seq_nrs[HSR_PT_SLAVE_A - 1], + HSR_SEQ_BLOCK_SIZE); + *if2_seq = (block->block_idx << HSR_SEQ_BLOCK_SHIFT) | seq_bit; + } + spin_unlock_bh(&node->seq_out_lock); +} + int hsr_get_node_data(struct hsr_priv *hsr, const unsigned char *addr, unsigned char addr_b[ETH_ALEN], @@ -846,8 +848,10 @@ int hsr_get_node_data(struct hsr_priv *hsr, *if2_age = jiffies_to_msecs(tdiff); /* Present sequence numbers as if they were incoming on interface */ - *if1_seq = node->seq_out[HSR_PT_SLAVE_B]; - *if2_seq = node->seq_out[HSR_PT_SLAVE_A]; + *if1_seq = 0; + *if2_seq = 0; + if (hsr->prot_version != PRP_V1) + fill_last_seq_nrs(node, if1_seq, if2_seq); if (node->addr_B_port != HSR_PT_NONE) { port = hsr_port_get_hsr(hsr, node->addr_B_port); diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 686f2a595400..c65ecb925734 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -89,12 +89,15 @@ struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, u16 block_idx); struct hsr_seq_block { unsigned long time; u16 block_idx; - DECLARE_BITMAP(seq_nrs, HSR_SEQ_BLOCK_SIZE); + /* Should be a flexible array member of what DECLARE_BITMAP() would + * produce. + */ + unsigned long seq_nrs[][BITS_TO_LONGS(HSR_SEQ_BLOCK_SIZE)]; }; struct hsr_node { struct list_head mac_list; - /* Protect R/W access to seq_out and seq_blocks */ + /* Protect R/W access seq_blocks */ spinlock_t seq_out_lock; unsigned char macaddress_A[ETH_ALEN]; unsigned char macaddress_B[ETH_ALEN]; @@ -102,17 +105,22 @@ struct hsr_node { enum hsr_port_type addr_B_port; unsigned long time_in[HSR_PT_PORTS]; bool time_in_stale[HSR_PT_PORTS]; - unsigned long time_out[HSR_PT_PORTS]; /* if the node is a SAN */ bool san_a; bool san_b; - u16 seq_out[HSR_PT_PORTS]; bool removed; - /* PRP specific duplicate handling */ + /* Duplicate detection */ struct xarray seq_blocks; - struct hsr_seq_block *block_buf; + void *block_buf; unsigned int next_block; + unsigned int seq_port_cnt; struct rcu_head rcu_head; }; +static inline size_t hsr_seq_block_size(struct hsr_node *node) +{ + WARN_ON_ONCE(node->seq_port_cnt == 0); + return struct_size_t(struct hsr_seq_block, seq_nrs, node->seq_port_cnt); +} + #endif /* __HSR_FRAMEREG_H */ diff --git a/net/hsr/prp_dup_discard_test.c b/net/hsr/prp_dup_discard_test.c index bfa3d318ec04..b028e71e6a0f 100644 --- a/net/hsr/prp_dup_discard_test.c +++ b/net/hsr/prp_dup_discard_test.c @@ -15,12 +15,15 @@ struct prp_test_data { static struct prp_test_data *build_prp_test_data(struct kunit *test) { + size_t block_sz; + struct prp_test_data *data = kunit_kzalloc(test, sizeof(struct prp_test_data), GFP_USER); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data); - data->node.block_buf = kunit_kcalloc(test, HSR_MAX_SEQ_BLOCKS, - sizeof(struct hsr_seq_block), + data->node.seq_port_cnt = 1; + block_sz = hsr_seq_block_size(&data->node); + data->node.block_buf = kunit_kcalloc(test, HSR_MAX_SEQ_BLOCKS, block_sz, GFP_ATOMIC); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, data->node.block_buf); @@ -30,8 +33,6 @@ static struct prp_test_data *build_prp_test_data(struct kunit *test) data->frame.node_src = &data->node; data->frame.port_rcv = &data->port_rcv; data->port_rcv.type = HSR_PT_SLAVE_A; - data->node.seq_out[HSR_PT_MASTER] = 0; - data->node.time_out[HSR_PT_MASTER] = jiffies; data->port.type = HSR_PT_MASTER; return data; @@ -48,7 +49,7 @@ static void check_prp_frame_seen(struct kunit *test, struct prp_test_data *data, KUNIT_EXPECT_NOT_NULL(test, block); seq_bit = sequence_nr & HSR_SEQ_BLOCK_MASK; - KUNIT_EXPECT_TRUE(test, test_bit(seq_bit, block->seq_nrs)); + KUNIT_EXPECT_TRUE(test, test_bit(seq_bit, block->seq_nrs[0])); } static void check_prp_frame_unseen(struct kunit *test, @@ -62,7 +63,7 @@ static void check_prp_frame_unseen(struct kunit *test, KUNIT_EXPECT_NOT_NULL(test, block); seq_bit = sequence_nr & HSR_SEQ_BLOCK_MASK; - KUNIT_EXPECT_FALSE(test, test_bit(seq_bit, block->seq_nrs)); + KUNIT_EXPECT_FALSE(test, test_bit(seq_bit, block->seq_nrs[0])); } static void prp_dup_discard_forward(struct kunit *test) @@ -73,30 +74,20 @@ static void prp_dup_discard_forward(struct kunit *test) data->frame.sequence_nr = 2; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); - KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); } static void prp_dup_discard_drop_duplicate(struct kunit *test) { struct prp_test_data *data = build_prp_test_data(test); - unsigned long time = jiffies - 10; data->frame.sequence_nr = 2; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); - data->node.time_out[HSR_PT_MASTER] = time; KUNIT_EXPECT_EQ(test, 1, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); - KUNIT_EXPECT_EQ(test, time, data->node.time_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); } @@ -123,9 +114,6 @@ static void prp_dup_discard_entry_timeout(struct kunit *test) KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); - KUNIT_EXPECT_EQ(test, jiffies, data->node.time_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); check_prp_frame_unseen(test, data, 7); } @@ -139,16 +127,12 @@ static void prp_dup_discard_out_of_sequence(struct kunit *test) data->frame.sequence_nr = 9; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); /* 1st old frame, should be accepted */ data->frame.sequence_nr = 8; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); /* 2nd frame should be dropped */ @@ -162,8 +146,6 @@ static void prp_dup_discard_out_of_sequence(struct kunit *test) data->port_rcv.type = HSR_PT_SLAVE_A; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); /* and next one is dropped */ @@ -178,20 +160,14 @@ static void prp_dup_discard_lan_b_late(struct kunit *test) /* LAN B is behind */ struct prp_test_data *data = build_prp_test_data(test); - data->node.seq_out[HSR_PT_MASTER] = 8; - data->frame.sequence_nr = 9; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); data->frame.sequence_nr = 10; KUNIT_EXPECT_EQ(test, 0, prp_register_frame_out(&data->port, &data->frame)); - KUNIT_EXPECT_EQ(test, data->frame.sequence_nr, - data->node.seq_out[HSR_PT_MASTER]); check_prp_frame_seen(test, data, data->frame.sequence_nr); data->frame.sequence_nr = 9; -- 2.52.0 Run the packet loss and reordering tests also for both HSR versions. Now they can be removed from the hsr_ping tests completely. The timeout needs to be increased because there are 15 link fault test cases now, with each of them taking 5-6sec for the test and at most 5sec for the HSR node tables to get merged and we also want some room to make the test runs stable. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- tools/testing/selftests/net/hsr/hsr_ping.sh | 32 +++------------- .../testing/selftests/net/hsr/link_faults.sh | 38 +++++++++++++++++++ tools/testing/selftests/net/hsr/settings | 2 +- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 0ec71b20ab75..f4d685df4345 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -90,27 +90,6 @@ do_ping_tests() stop_if_error "Longer ping test failed (ns3)." } -do_link_problem_tests() -{ - echo "INFO: Running link problem tests." - - echo "INFO: Delay the link and drop a few packages." - tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms - tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% - - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" 100.64.0.3 - stop_if_error "Failed with delay and packetloss (ns1)." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" 100.64.0.3 - stop_if_error "Failed with delay and packetloss (ns2)." - - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" 100.64.0.2 - stop_if_error "Failed with delay and packetloss (ns3)." -} - setup_hsr_interfaces() { local HSRv="$1" @@ -190,11 +169,10 @@ setup_vlan_interfaces() { } -run_complete_ping_tests() +run_ping_tests() { - echo "INFO: Running complete ping tests." + echo "INFO: Running ping tests." do_ping_tests 0 - do_link_problem_tests } run_vlan_tests() @@ -204,7 +182,7 @@ run_vlan_tests() vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then - echo "INFO: Running VLAN tests" + echo "INFO: Running VLAN ping tests" setup_vlan_interfaces do_ping_tests 2 else @@ -217,12 +195,12 @@ trap cleanup_all_ns EXIT setup_ns ns1 ns2 ns3 setup_hsr_interfaces 0 -run_complete_ping_tests +run_ping_tests run_vlan_tests setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 -run_complete_ping_tests +run_ping_tests run_vlan_tests exit $ret diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh index 1959bea17147..be526281571c 100755 --- a/tools/testing/selftests/net/hsr/link_faults.sh +++ b/tools/testing/selftests/net/hsr/link_faults.sh @@ -7,8 +7,16 @@ source ../lib.sh ALL_TESTS=" test_clean_hsrv0 test_cut_link_hsrv0 + test_packet_loss_hsrv0 + test_high_packet_loss_hsrv0 + test_reordering_hsrv0 + test_clean_hsrv1 test_cut_link_hsrv1 + test_packet_loss_hsrv1 + test_high_packet_loss_hsrv1 + test_reordering_hsrv1 + test_clean_prp test_cut_link_prp test_packet_loss_prp @@ -292,11 +300,31 @@ test_packet_loss() log_test "${tname}" } +test_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "20%" +} + +test_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "20%" +} + test_packet_loss_prp() { test_packet_loss "PRP" "20%" } +test_high_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "80%" +} + +test_high_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "80%" +} + test_high_packet_loss_prp() { test_packet_loss "PRP" "80%" @@ -323,6 +351,16 @@ test_reordering() log_test "${tname}" } +test_reordering_hsrv0() +{ + test_reordering "HSRv0" +} + +test_reordering_hsrv1() +{ + test_reordering "HSRv1" +} + test_reordering_prp() { test_reordering "PRP" diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings index ba4d85f74cd6..a953c96aa16e 100644 --- a/tools/testing/selftests/net/hsr/settings +++ b/tools/testing/selftests/net/hsr/settings @@ -1 +1 @@ -timeout=90 +timeout=180 -- 2.52.0 Despite the HSR subsystem being orphaned at the moment due to the original maintainer being unreachable for a while, assign the selftests to the subsystem for the future. Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0caa8aee5840..d50ed2f88ff7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11586,6 +11586,7 @@ HSR NETWORK PROTOCOL L: netdev@vger.kernel.org S: Orphan F: net/hsr/ +F: tools/testing/selftests/net/hsr/ HT16K33 LED CONTROLLER DRIVER M: Robin van der Gracht -- 2.52.0