Add VLAN TCI formatting and parsing support to ovs-dpctl.py: - Add _vlan_dpstr() to decompose TCI into vid/pcp/cfi fields, with raw tci=0x%04x fallback when cfi=0 for round-trip safety. - Add _parse_vlan_from_flowstr() boundary check for missing ')'. - Add encap_ovskey subclass restricting nla_map to L2-L4 attributes (slots 0-21) that appear inside 802.1Q ENCAP, with metadata attributes set to "none". - Check parse() return value for unrecognized trailing content. - Support callable format functions in dpstr() output. Signed-off-by: Minxi Hou --- .../selftests/net/openvswitch/ovs-dpctl.py | 267 +++++++++++++++++- 1 file changed, 259 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 848f61fdcee0..87b1ab7bf201 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -901,11 +901,11 @@ class ovskey(nla): nla_flags = NLA_F_NESTED nla_map = ( ("OVS_KEY_ATTR_UNSPEC", "none"), - ("OVS_KEY_ATTR_ENCAP", "none"), + ("OVS_KEY_ATTR_ENCAP", "encap_ovskey"), ("OVS_KEY_ATTR_PRIORITY", "uint32"), ("OVS_KEY_ATTR_IN_PORT", "uint32"), ("OVS_KEY_ATTR_ETHERNET", "ethaddr"), - ("OVS_KEY_ATTR_VLAN", "uint16"), + ("OVS_KEY_ATTR_VLAN", "be16"), ("OVS_KEY_ATTR_ETHERTYPE", "be16"), ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"), ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"), @@ -1636,6 +1636,204 @@ class ovskey(nla): class ovs_key_mpls(nla): fields = (("lse", ">I"),) + # 802.1Q CFI (Canonical Format Indicator) bit, always set for Ethernet + _VLAN_CFI_MASK = 0x1000 + _MAX_ENCAP_DEPTH = 4 + _encap_depth = 0 # single-threaded usage assumed + + @staticmethod + def _vlan_dpstr(tci): + """Format VLAN TCI as vid=X,pcp=Y,cfi=Z or tci=0xNNNN. + + When cfi=1 (standard Ethernet VLAN), outputs decomposed + vid/pcp/cfi fields. When cfi=0 (truncated VLAN header), + falls back to raw tci=0x%04x to ensure round-trip + correctness: the parser auto-adds cfi=1 for vid/pcp + format, so cfi=0 would be lost on re-parse.""" + vid = tci & 0x0FFF + pcp = (tci >> 13) & 0x7 + cfi = (tci >> 12) & 0x1 + if cfi: + return "vid=%d,pcp=%d,cfi=%d" % (vid, pcp, cfi) + return "tci=0x%04x" % tci + + @staticmethod + def _parse_vlan_from_flowstr(flowstr): + """Parse vlan(tci=X) or vlan(vid=X[,pcp=Y,cfi=Z]) from flowstr. + + Returns (remaining_flowstr, key_tci, mask_tci). + TCI values use standard bit layout (VID bits 0-11, + CFI bit 12, PCP bits 13-15); byte order conversion to + big-endian happens in pyroute2 be16 NLA serialization. + The mask covers only the fields the caller specified: + vid -> 0x0FFF, pcp -> 0xE000, cfi -> 0x1000, tci -> 0xFFFF. + + The tci= key sets the raw TCI bitfield (no CFI validation) to allow + non-Ethernet use cases. Use cfi=1 for standard Ethernet VLAN matching. + """ + tci = 0 + mask = 0 + has_tci = False + has_vid = has_pcp = has_cfi = False + _tci_mix_err = "vlan(): 'tci' cannot be mixed " \ + "with 'vid'/'pcp'/'cfi'" + first = True + while True: + flowstr = flowstr.lstrip() + if not flowstr: + raise ValueError("vlan(): missing ')'") + if flowstr[0] == ')': + break + if not first: + flowstr = flowstr[1:] # skip ',' + if not flowstr: + raise ValueError("vlan(): missing ')' after trailing comma") + flowstr = flowstr.lstrip() + if flowstr and flowstr[0] == ')': + break + if flowstr and flowstr[0] == ',': + raise ValueError( + "vlan(): empty or extra comma in field list") + first = False + + eq = flowstr.find('=') + if eq == -1: + raise ValueError( + "vlan(): expected key=value, got '%s'" % flowstr) + key = flowstr[:eq].strip() + flowstr = flowstr[eq + 1:] + + end = flowstr.find(',') + end2 = flowstr.find(')') + if end == -1 and end2 == -1: + raise ValueError("vlan(): missing ')'") + if end == -1 or (end2 != -1 and end2 < end): + end = end2 + val = flowstr[:end].strip() + flowstr = flowstr[end:] + + if not val: + raise ValueError("vlan(): empty value for key '%s'" % key) + try: + v = int(val, 16) if val.startswith(('0x', '0X')) else int(val) + except ValueError: + raise ValueError("vlan(): invalid value '%s' for key '%s'" % + (val, key)) + + if key == 'tci': + if has_tci: + raise ValueError("vlan(): duplicate 'tci'") + if has_vid or has_pcp or has_cfi: + raise ValueError(_tci_mix_err) + if v > 0xFFFF or v < 0: + raise ValueError("vlan(): tci=0x%x out of range" % v) + tci = v + mask = 0xFFFF + has_tci = True + elif key == 'vid': + if has_tci: + raise ValueError(_tci_mix_err) + if has_vid: + raise ValueError("vlan(): duplicate 'vid'") + if v < 0 or v > 0xFFF: + raise ValueError("vlan(): vid=%d out of range (0-4095)" % v) + tci |= v + mask |= 0x0FFF + has_vid = True + elif key == 'pcp': + if has_tci: + raise ValueError(_tci_mix_err) + if has_pcp: + raise ValueError("vlan(): duplicate 'pcp'") + if v < 0 or v > 7: + raise ValueError("vlan(): pcp=%d out of range (0-7)" % v) + tci |= (v & 0x7) << 13 + mask |= 0xE000 + has_pcp = True + elif key == 'cfi': + if has_tci: + raise ValueError(_tci_mix_err) + if has_cfi: + raise ValueError("vlan(): duplicate 'cfi'") + if v != 1: + raise ValueError("vlan(): cfi must be 1 for Ethernet") + tci |= ovskey._VLAN_CFI_MASK + mask |= ovskey._VLAN_CFI_MASK + has_cfi = True + else: + raise ValueError("vlan(): unknown key '%s'" % key) + + flowstr = flowstr[1:] # skip ')' + # Catch immediate '))' (user error). A ')' after ',' is consumed + # by parse()'s strspn(flowstr, "), ") inter-field separator stripping. + if flowstr.lstrip().startswith(')'): + raise ValueError("vlan(): unmatched ')'") + # parse() strips trailing ',', ')', ' ' as inter-field separators, + # so we do not need to call strspn here. + + if mask == 0: + raise ValueError("vlan(): no fields specified, " + "use vlan(vid=X[,pcp=Y,cfi=Z]) or vlan(tci=X)") + if not has_tci: + tci |= ovskey._VLAN_CFI_MASK + mask |= ovskey._VLAN_CFI_MASK + return flowstr, tci, mask + + @staticmethod + def _parse_encap_from_flowstr(flowstr): + """Parse encap(inner_flow) from flowstr. + + Returns (remaining_flowstr, inner_key_dict, inner_mask_dict) + where each dict has an 'attrs' key for recursive NLA encoding. + Parenthesis-depth tracking handles nested encap() calls but not + quoted strings containing literal parentheses. + """ + if ovskey._encap_depth >= ovskey._MAX_ENCAP_DEPTH: + raise ValueError("encap(): max nesting depth %d exceeded" % + ovskey._MAX_ENCAP_DEPTH) + try: + ovskey._encap_depth += 1 + depth = 1 + end = -1 + for i, c in enumerate(flowstr): + if c == '(': + depth += 1 + elif c == ')': + depth -= 1 + if depth < 0: + raise ValueError( + "encap(): unmatched ')' at position %d" % i) + if depth == 0: + end = i + break + + if end == -1: + if depth > 1: + raise ValueError("encap(): missing ')' at end") + raise ValueError("encap(): missing closing ')'") + + inner_str = flowstr[:end].strip() + if not inner_str: + raise ValueError("encap(): empty inner flow") + + flowstr = flowstr[end + 1:] + if flowstr.lstrip().startswith(')'): + raise ValueError("encap(): unmatched ')' after encap()") + # parse() strips trailing ',', ')', ' ' as inter-field separators, + # so we do not need to call strspn here. + + inner_key = encap_ovskey() + inner_mask = encap_ovskey() + remaining = inner_key.parse(inner_str, inner_mask) + if remaining and re.search(r'[^\s,)]', remaining): + raise ValueError( + "encap(): unrecognized trailing " + "content '%s'" % remaining.strip()) + + return flowstr, inner_key, inner_mask + finally: + ovskey._encap_depth -= 1 + def parse(self, flowstr, mask=None): for field in ( ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), @@ -1657,6 +1855,16 @@ class ovskey(nla): "eth_type", lambda x: intparse(x, "0xffff"), ), + ( + "OVS_KEY_ATTR_VLAN", + "vlan", + ovskey._parse_vlan_from_flowstr, + ), + ( + "OVS_KEY_ATTR_ENCAP", + "encap", + ovskey._parse_encap_from_flowstr, + ), ( "OVS_KEY_ATTR_IPV4", "ipv4", @@ -1794,6 +2002,9 @@ class ovskey(nla): True, ), ("OVS_KEY_ATTR_ETHERNET", None, None, False, False), + ("OVS_KEY_ATTR_VLAN", "vlan", ovskey._vlan_dpstr, + lambda x: False, True), + ("OVS_KEY_ATTR_ENCAP", None, None, False, False), ( "OVS_KEY_ATTR_ETHERTYPE", "eth_type", @@ -1821,22 +2032,61 @@ class ovskey(nla): v = self.get_attr(field[0]) if v is not None: m = None if mask is None else mask.get_attr(field[0]) + fmt = field[2] # str format or callable if field[4] is False: print_str += v.dpstr(m, more) print_str += "," else: if m is None or field[3](m): - print_str += field[1] + "(" - print_str += field[2] % v - print_str += ")," + val = fmt(v) if callable(fmt) else fmt % v + print_str += field[1] + "(" + val + ")," elif more or m != 0: - print_str += field[1] + "(" - print_str += (field[2] % v) + "/" + (field[2] % m) - print_str += ")," + if callable(fmt): + val = fmt(v) + "/" + fmt(m) + else: + val = (fmt % v) + "/" + (fmt % m) + print_str += field[1] + "(" + val + ")," return print_str +class encap_ovskey(ovskey): + """Inner flow key attributes valid inside 802.1Q ENCAP. + + Only L2-L4 key attributes (slots 0-21) appear inside ENCAP. + Metadata-only attributes (SKB_MARK, DP_HASH, RECIRC_ID, etc.) + are set to "none" — they never appear inside ENCAP per + ovs_nla_put_vlan() in net/openvswitch/flow_netlink.c. + + nla_map indexes must match OVS_KEY_ATTR_* enum values in + include/uapi/linux/openvswitch.h. + """ + nla_map = ( + ("OVS_KEY_ATTR_UNSPEC", "none"), # 0 + ("OVS_KEY_ATTR_ENCAP", "none"), # 1 — placeholder, no recursion + ("OVS_KEY_ATTR_PRIORITY", "none"), # 2 — skb metadata, not in ENCAP + ("OVS_KEY_ATTR_IN_PORT", "none"), # 3 — skb metadata, not in ENCAP + ("OVS_KEY_ATTR_ETHERNET", "ethaddr"), # 4 + ("OVS_KEY_ATTR_VLAN", "be16"), # 5 + ("OVS_KEY_ATTR_ETHERTYPE", "be16"), # 6 + ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"), # 7 + ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"), # 8 + ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"), # 9 + ("OVS_KEY_ATTR_UDP", "ovs_key_udp"), # 10 + ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"), # 11 + ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"), # 12 + ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), # 13 + ("OVS_KEY_ATTR_ND", "ovs_key_nd"), # 14 + ("OVS_KEY_ATTR_SKB_MARK", "none"), # 15 — metadata, not in ENCAP + ("OVS_KEY_ATTR_TUNNEL", "none"), # 16 — tunnel metadata, not in ENCAP + ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), # 17 + ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), # 18 + ("OVS_KEY_ATTR_DP_HASH", "none"), # 19 — metadata, not in ENCAP + ("OVS_KEY_ATTR_RECIRC_ID", "none"), # 20 — metadata, not in ENCAP + ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"), # 21 + ) + + class OvsPacket(GenericNetlinkSocket): OVS_PACKET_CMD_MISS = 1 # Flow table miss OVS_PACKET_CMD_ACTION = 2 # USERSPACE action @@ -2576,6 +2826,7 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): def main(argv): + nlmsg_atoms.encap_ovskey = encap_ovskey nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions -- 2.53.0 Add test_pop_vlan() to verify OVS kernel datapath pop_vlan action correctly strips 802.1Q VLAN tags from frames. Test structure: - Baseline: untagged forwarding validates basic connectivity. - Negative: forward without pop_vlan, assert VLAN tag preserved. - Positive: forward with pop_vlan, assert tag stripped and untagged ICMP echo request arrives. Add start_capture/stop_capture helpers using ovs_wait for deterministic tcpdump readiness instead of ad-hoc sleep. Signed-off-by: Minxi Hou --- .../selftests/net/openvswitch/openvswitch.sh | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index b327d3061ed5..7c937b48d9b6 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -27,6 +27,7 @@ tests=" upcall_interfaces ovs: test the upcall interfaces tunnel_metadata ovs: test extraction of tunnel metadata drop_reason drop: test drop reasons are emitted + pop_vlan vlan-pop: POP_VLAN action strips 802.1Q tag psample psample: Sampling packets with psample" info() { @@ -830,6 +831,197 @@ test_tunnel_metadata() { return 0 } +# Start tcpdump capture with deterministic readiness wait. +# Usage: start_capture +# $4 and $5 are variable NAMES — start_capture writes the tcpdump PID +# and log path into those caller variables via nameref (bash 4.3+). +# Contract: caller MUST call stop_capture with the returned PID and log +# before returning from the function. +start_capture() { + local ns="$1" iface="$2" pcap="$3" + local -n _out_pid="$4" + local -n _out_log="$5" + local log pid + + command -v tcpdump >/dev/null 2>&1 || { + info "tcpdump missing" + return $ksft_skip + } + + log=$(mktemp) + ip netns exec "$ns" tcpdump -nei "$iface" \ + -w "$pcap" -U 2>"$log" & + pid=$! + ovs_wait grep -q "listening on" "$log" || { + kill $pid 2>/dev/null + wait $pid 2>/dev/null + rm -f "$log" + info "FAIL: tcpdump failed to start on $iface" + return 1 + } + kill -0 $pid 2>/dev/null || { + wait $pid 2>/dev/null + rm -f "$log" + info "FAIL: tcpdump died after start on $iface" + return 1 + } + # $pid/$log expand now (intentional — captures concrete values) + on_exit "kill $pid 2>/dev/null; rm -f $log" + _out_pid=$pid + _out_log=$log +} + +# Stop capture and cleanup temp files. +# Usage: stop_capture +stop_capture() { + kill "$1" 2>/dev/null + wait "$1" 2>/dev/null + rm -f "$2" +} + +test_pop_vlan() { + modprobe -q openvswitch 2>/dev/null || true + [ -d /sys/module/openvswitch ] || return $ksft_skip + local ns_err + ns_err=$(mktemp) + if ! ip netns add __test_pop_vlan_netns__ 2>"$ns_err"; then + if grep -q "File exists" "$ns_err"; then + ip netns del __test_pop_vlan_netns__ 2>/dev/null + else + info "CONFIG_NET_NS missing or unavailable" + rm -f "$ns_err" + return $ksft_skip + fi + fi + ip netns del __test_pop_vlan_netns__ 2>/dev/null + rm -f "$ns_err" + modprobe -q 8021q 2>/dev/null || true + [ -d /sys/module/8021q ] || \ + { info "CONFIG_VLAN_8021Q missing"; return $ksft_skip; } + + local sbx="test_pop_vlan" + sbx_add "$sbx" || return $? + ovs_add_dp "$sbx" vlandp || return 1 + + # Validate basic connectivity before testing pop_vlan. + # --- baseline: untagged forwarding --- + ovs_add_netns_and_veths "$sbx" vlandp \ + ns1 veth1 ns1veth 192.0.2.1/24 || return 1 + ovs_add_netns_and_veths "$sbx" vlandp \ + ns2 veth2 ns2veth 192.0.2.2/24 || return 1 + + # ARP + IPv4 bidirectional (all untagged) + ovs_add_flow "$sbx" vlandp \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(1),eth(),eth_type(0x0800),ipv4()' '2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0800),ipv4()' '1' || return 1 + ip netns exec ns1 ping -c 3 -W 2 192.0.2.2 || return 1 + + # --- POP_VLAN test --- + # ns1: VLAN sub-interface generates tagged frames + ip -n ns1 link add link ns1veth name ns1veth.10 \ + type vlan id 10 || return 1 + on_exit "ip -n ns1 link del ns1veth.10 2>/dev/null || true" + ip -n ns1 addr add 198.51.100.1/24 dev ns1veth.10 || return 1 + ip -n ns1 link set ns1veth.10 up || return 1 + + # ns2: no VLAN sub-interface. POP delivers untagged frames to ns2veth + ip -n ns2 addr add 198.51.100.2/24 dev ns2veth || return 1 + on_exit "ip -n ns2 addr del 198.51.100.2/24 dev ns2veth 2>/dev/null || true" + + # veth disable VLAN offload + GRO (ensure kernel software tag processing) + if command -v ethtool >/dev/null 2>&1; then + ip netns exec ns1 ethtool -k ns1veth 2>/dev/null | grep -q vlan-offload && \ + ip netns exec ns1 ethtool -K ns1veth rx-vlan-offload off \ + tx-vlan-offload off gro off 2>/dev/null || true + ip netns exec ns2 ethtool -k ns2veth 2>/dev/null | grep -q vlan-offload && \ + ip netns exec ns2 ethtool -K ns2veth rx-vlan-offload off \ + tx-vlan-offload off gro off 2>/dev/null || true + fi + + ovs_del_flows "$sbx" vlandp + + # Static ARP avoids VLAN-tagged ARP complexity (ns2 has no VLAN + # sub-interface, so tagged ARP would be invisible to ns2). + local ns1veth10mac ns2mac + ns1veth10mac=$(ip -n ns1 link show ns1veth.10 | \ + awk '/link\/ether/ {print $2}') + ns2mac=$(ip -n ns2 link show ns2veth | \ + awk '/link\/ether/ {print $2}') + [ -n "$ns1veth10mac" ] && echo "$ns1veth10mac" | \ + grep -qE "^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$" || return 1 + [ -n "$ns2mac" ] && echo "$ns2mac" | \ + grep -qE "^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$" || return 1 + ip -n ns1 neigh replace 198.51.100.2 lladdr "$ns2mac" \ + dev ns1veth.10 nud permanent || return 1 + ip -n ns2 neigh replace 198.51.100.1 lladdr "$ns1veth10mac" \ + dev ns2veth nud permanent || return 1 + + # --- Negative check: fwd without pop_vlan, VLAN tag stays --- + local vlan_match='in_port(1),eth(),eth_type(0x8100),' + vlan_match+='vlan(vid=10),' + vlan_match+='encap(eth_type(0x0800),' + vlan_match+='ipv4(src=198.51.100.1,proto=1),icmp())' + ovs_add_flow "$sbx" vlandp "$vlan_match" '2' || return 1 + + local pcap_no_pop + pcap_no_pop=$(mktemp --suffix=.pcap) + on_exit "rm -f $pcap_no_pop" + local tpid tlog + start_capture ns2 ns2veth "$pcap_no_pop" tpid tlog || return $? + + ip netns exec ns1 ping -I ns1veth.10 -c 3 -W 1 198.51.100.2 \ + >/dev/null 2>&1 || true + stop_capture "$tpid" "$tlog" + + # assert: VLAN tag still present (no pop_vlan in action) + tcpdump -nr "$pcap_no_pop" 'vlan' 2>/dev/null | grep -q . || { + info "FAIL: negative check: no VLAN tag (expected tag present)" + return 1 + } + + ovs_del_flows "$sbx" vlandp + + # --- Positive: pop_vlan strips tag --- + ovs_add_flow "$sbx" vlandp "$vlan_match" 'pop_vlan,2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0800),ipv4()' '1' || return 1 + + local pcap + pcap=$(mktemp --suffix=.pcap) + on_exit "rm -f $pcap" + local tpid2 tlog2 + start_capture ns2 ns2veth "$pcap" tpid2 tlog2 || return $? + + # ns1veth.10 only accepts tagged frames; + # ns2 sends untagged reply → dropped by ns1 + local ping_rc=0 + ip netns exec ns1 ping -I ns1veth.10 -c 3 -W 1 198.51.100.2 \ + >/dev/null 2>&1 || ping_rc=$? + stop_capture "$tpid2" "$tlog2" + + # ping failure is expected (reply path asymmetric) + [ "$ping_rc" -ne 0 ] || { + info "FAIL: ping succeeded unexpectedly" + return 1 + } + + # assert: no VLAN tag (POP succeeded), untagged ICMP echo request arrived + tcpdump -nr "$pcap" 'vlan' 2>/dev/null | grep -q . && { + info "FAIL: POP_VLAN: VLAN tag still present"; return 1 + } + tcpdump -nr "$pcap" 'icmp and icmp[icmptype]=8' \ + 2>/dev/null | grep -q . || { + info "FAIL: POP_VLAN: no untagged ICMP echo request"; return 1 + } + + return 0 +} + run_test() { ( tname="$1" -- 2.53.0