io_uring defers zcrx context teardown to the iou_exit workqueue. # ps aux | grep iou ... 07:58 0:00 [kworker/u19:0-iou_exit] ... 07:58 0:00 [kworker/u18:2-iou_exit] When the test's receiver process exits, bkg() returns but the memory provider may still be attached to the rx queue. The subsequent defer() that restores tcp-data-split then fails: # Exception while handling defer / cleanup (callback 3 of 3)! # Defer Exception| net.ynl.pyynl.lib.ynl.NlError: Netlink error: can't disable tcp-data-split while device has memory provider enabled: Invalid argument not ok 1 iou-zcrx.test_zcrx.single Add a helper that polls netdev queue-get until no rx queue reports the io-uring memory provider attribute. Register it as a defer() just before tcp-data-split is restored as a "barrier". Signed-off-by: Jakub Kicinski --- CC: shuah@kernel.org CC: dw@davidwei.uk CC: jdamato@fastly.com CC: linux-kselftest@vger.kernel.org --- .../selftests/drivers/net/hw/iou-zcrx.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index c63d6d6450d2..c27c2064701d 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -2,14 +2,27 @@ # SPDX-License-Identifier: GPL-2.0 import re +import time from os import path from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen -from lib.py import EthtoolFamily +from lib.py import EthtoolFamily, NetdevFamily SKIP_CODE = 42 + +def mp_clear_wait(cfg): + """Wait for io_uring memory providers to clear from all device queues.""" + deadline = time.time() + 5 + while time.time() < deadline: + queues = cfg.netnl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not any('io-uring' in q for q in queues): + return + time.sleep(0.1) + raise TimeoutError("Timed out waiting for memory provider to clear") + + def create_rss_ctx(cfg): output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) @@ -46,6 +59,7 @@ SKIP_CODE = 42 'tcp-data-split': 'unknown', 'hds-thresh': hds_thresh, 'rx': rx_rings}) + defer(mp_clear_wait, cfg) cfg.target = channels - 1 ethtool(f"-X {cfg.ifname} equal {cfg.target}") @@ -73,6 +87,7 @@ SKIP_CODE = 42 'tcp-data-split': 'unknown', 'hds-thresh': hds_thresh, 'rx': rx_rings}) + defer(mp_clear_wait, cfg) cfg.target = channels - 1 ethtool(f"-X {cfg.ifname} equal {cfg.target}") @@ -159,6 +174,7 @@ SKIP_CODE = 42 cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() cfg.port = rand_port() ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, )) ksft_exit() -- 2.53.0 Commit a32bb32d0193 ("selftests: iou-zcrx: test large chunk sizes") and commit de7c600e2d5b ("selftests/net: parametrise iou-zcrx.py with ksft_variants") landed at similar time. The large chunks test was actually not included in the list of tests, so it never run. We haven't noticed that it uses the old-style helpers (_get_combined_channels, _get_current_settings, _set_flow_rule) that were removed by the other commit. Rework test_zcrx_large_chunks to reuse the single() setup function and add it to the ksft_run cases list so it actually gets executed. Signed-off-by: Jakub Kicinski --- CC: shuah@kernel.org CC: dw@davidwei.uk CC: jdamato@fastly.com CC: linux-kselftest@vger.kernel.org --- .../selftests/drivers/net/hw/iou-zcrx.py | 31 ++++--------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index c27c2064701d..1649c23e05e2 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -135,36 +135,16 @@ SKIP_CODE = 42 cfg.require_ipver('6') - combined_chans = _get_combined_channels(cfg) - if combined_chans < 2: - raise KsftSkipEx('at least 2 combined channels required') - (rx_ring, hds_thresh) = _get_current_settings(cfg) - port = rand_port() - - ethtool(f"-G {cfg.ifname} tcp-data-split on") - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - - ethtool(f"-G {cfg.ifname} hds-thresh 0") - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - - ethtool(f"-G {cfg.ifname} rx 64") - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") - defer(ethtool, f"-X {cfg.ifname} default") - - flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - - rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2" - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + single(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" probe = cmd(rx_cmd + " -d", fail=False) if probe.ret == SKIP_CODE: raise KsftSkipEx(probe.stdout) with bkg(rx_cmd, exit_wait=True): - wait_port_listen(port, proto="tcp") + wait_port_listen(cfg.port, proto="tcp") cmd(tx_cmd, host=cfg.remote) @@ -176,7 +156,8 @@ SKIP_CODE = 42 cfg.ethnl = EthtoolFamily() cfg.netnl = NetdevFamily() cfg.port = rand_port() - ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, )) + ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot, + test_zcrx_large_chunks], args=(cfg, )) ksft_exit() -- 2.53.0 The large chunks test needs 2MB hugepages for its mmap allocation, but the test system may not have any pre-allocated. Ensure at least 64 hugepages are available before running the test, and restore the original value on cleanup. While at it strip the stdout, it has a trailing new line. Before: ok 5 iou-zcrx.test_zcrx_large_chunks # SKIP Can't allocate huge pages Signed-off-by: Jakub Kicinski --- CC: shuah@kernel.org CC: dw@davidwei.uk CC: jdamato@fastly.com CC: linux-kselftest@vger.kernel.org --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 1649c23e05e2..66dd496ec5cf 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -135,13 +135,21 @@ SKIP_CODE = 42 cfg.require_ipver('6') + hp_file = "/proc/sys/vm/nr_hugepages" + with open(hp_file, 'r+', encoding='utf-8') as f: + nr_hugepages = int(f.read().strip()) + if nr_hugepages < 64: + f.seek(0) + f.write("64") + defer(lambda: open(hp_file, 'w', encoding='utf-8').write(str(nr_hugepages))) + single(cfg) rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -x 2" tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" probe = cmd(rx_cmd + " -d", fail=False) if probe.ret == SKIP_CODE: - raise KsftSkipEx(probe.stdout) + raise KsftSkipEx(probe.stdout.strip()) with bkg(rx_cmd, exit_wait=True): wait_port_listen(cfg.port, proto="tcp") -- 2.53.0