From: Chia-Yu Chang This patch updates the documentation of ECN‑related GSO flags, it clarifies the limitations of SKB_GSO_TCP_ECN and explains how to preserve the CWR flag (part of the ACE signal) in the Rx path. For Tx, SKB_GSO_TCP_ECN and SKB_GSO_TCP_ACCECN are used respectively for RFC3168 ECN and AccECN (RFC9768). SKB_GSO_TCP_ECN indicates that the first segment has CWR set, while subsequent segments have CWR cleared. In contrast, SKB_GSO_TCP_ACCECN means that the segment uses AccECN and therefore its CWR flag must not be modified durging segmentation. For RX, SKB_GSO_TCP_ECN shall NOT be used, because the stack cannot know whether the connection uses RFC3168 ECN or AccECN, whereas RFC3168 ECN offload may clear CWR flag and thus corrupts the ACE signal. Instead, any segment that arrives with CWR set must use the SKB_GSO_TCP_ACCECN flag to prevent RFC3168 ECN offload logic from clearing the CWR flag. Co-developed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Chia-Yu Chang --- v3: - Update commit messages and documentation for clarity --- include/linux/skbuff.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b399ddf1b9b..c59f0ce414d9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -671,7 +671,13 @@ enum { /* This indicates the skb is from an untrusted source. */ SKB_GSO_DODGY = 1 << 1, - /* This indicates the tcp segment has CWR set. */ + /* For Tx, this indicates that the first TCP segment has CWR set, and + * any subsequent segment in the same skb has CWR cleared. This flag + * must not be used in Rx, because the connection to which the segment + * belongs is not tracked to use RFC3168 or AccECN. Using RFC3168 ECN + * offload may clear CWR and corrupt ACE signal (CWR is part of it). + * Instead, SKB_GSO_TCP_ACCECN shall be used to avoid CWR corruption. + */ SKB_GSO_TCP_ECN = 1 << 2, __SKB_GSO_TCP_FIXEDID = 1 << 3, @@ -706,6 +712,13 @@ enum { SKB_GSO_FRAGLIST = 1 << 18, + /* For TX, this indicates that the TCP segment uses the CWR flag as part + * of the ACE signal, and the CWR flag must not be modified in the skb. + * For RX, any incoming segment with CWR set must use this flag so that + * no RFC3168 ECN offload can clear the CWR flag. This is essential for + * preserving ACE signal correceness (CWR is part of it) in a forwarding + * scenario, e.g., from virtio_net RX to GSO TX. + */ SKB_GSO_TCP_ACCECN = 1 << 19, /* These indirectly map onto the same netdev feature. -- 2.34.1 From: Chia-Yu Chang Currently, hns3 and mlx5 Rx paths use SKB_GSO_TCP_ECN flag when a TCP segment with the CWR flag set. This is wrong because SKB_GSO_TCP_ECN is only valid for RFC3168 ECN on Tx, and using it on Rx allows RFC3168 ECN offload to clear the CWR flag. As a result, incoming TCP segments lose their ACE signal integrity required for AccECN (RFC9768), especially when the packet is forwarded and later re-segmented by GSO. Fix this by setting SKB_GSO_TCP_ACCECN for any Rx segment with the CWR flag set. SKB_GSO_TCP_ACCECN ensure that RFC3168 ECN offload will not clear the CWR flag, therefore preserving the ACE signal. Signed-off-by: Chia-Yu Chang --- v3: - Rewrite the commit message for clarity --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a47464a22751..3a1cf4335477 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3897,7 +3897,7 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN; if (l234info & BIT(HNS3_RXD_GRO_FIXID_B)) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1fc3720d2201..d174f83478a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1311,7 +1311,7 @@ static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr * skb->csum_offset = offsetof(struct tcphdr, check); if (tcp->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN; } static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6, @@ -1332,7 +1332,7 @@ static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr skb->csum_offset = offsetof(struct tcphdr, check); if (tcp->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN; } static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) -- 2.34.1 From: Chia-Yu Chang virtio_net currently negotiates ECN-related capabilities through VIRTIO_NET_F_HOST_ECN and VIRTIO_NET_F_GUEST_ECN. This is not sufficient for flows using AccECN (RFC9768), because AccECN requires preserving the ACE signal (CWR flag is part of it) across GSO operations. Without explicit AccECN capability bits, the device and driver may treat AccECN traffic using the RFC3168 ECN offload logic, causing the CWR flag to be cleared. As a result, AccECN segments may lose their ACE signal integrity. Fix this by adding new AccECN capability bits for negotiation between host and guest: VIRTIO_NET_F_HOST_ACCECN and VIRTIO_NET_F_GUEST_ACCECN. In addition, translate the AccECN GSO flag correctly between the virtio header (VIRTIO_NET_HDR_GSO_ACCECN) and skb metadata (SKB_GSO_TCP_ACCECN) to ensure correct ACE signal preservation bwtwen virtio_net and the socket stacki. This corresponds to discussions in virtio mailing list: https://lore.kernel.org/all/20250814120118.81787-1-chia-yu.chang@nokia-bell-labs.com/ And it was suggested to clarify documents of SKB_GSO_TCP_ECN and SKB_GSO_TCP_ACCECN first. Signed-off-by: Chia-Yu Chang --- v3: - Update commit message and title for clarity v2: - Replace VIRTIO_NET_HDR_GSO_ECN with VIRTIO_NET_HDR_GSO_ECN_FLAGS --- drivers/net/virtio_net.c | 14 +++++++++++--- drivers/vdpa/pds/debugfs.c | 6 ++++++ include/linux/virtio_net.h | 18 +++++++++++------- include/uapi/linux/virtio_net.h | 5 +++++ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index db88dcaefb20..103fb87c690e 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -75,6 +75,7 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_ACCECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_USO4, @@ -87,6 +88,7 @@ static const unsigned long guest_offloads[] = { #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_ACCECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO) | \ (1ULL << VIRTIO_NET_F_GUEST_USO4) | \ (1ULL << VIRTIO_NET_F_GUEST_USO6) | \ @@ -5976,6 +5978,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ACCECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) || @@ -6635,6 +6638,7 @@ static bool virtnet_check_guest_gso(const struct virtnet_info *vi) return virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ACCECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) && virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6)); @@ -6749,6 +6753,8 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; + if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ACCECN)) + dev->hw_features |= NETIF_F_GSO_ACCECN; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO)) dev->hw_features |= NETIF_F_GSO_UDP_L4; @@ -7169,9 +7175,11 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \ VIRTIO_NET_F_MAC, \ VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \ - VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ - VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \ - VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_HOST_ACCECN, \ + VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \ + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_ACCECN, \ + VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_USO, \ + VIRTIO_NET_F_GUEST_USO4, VIRTIO_NET_F_GUEST_USO6, \ VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \ VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index c328e694f6e7..90bd95db0245 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -78,6 +78,9 @@ static void print_feature_bits_all(struct seq_file *seq, u64 features) case BIT_ULL(VIRTIO_NET_F_GUEST_ECN): seq_puts(seq, " VIRTIO_NET_F_GUEST_ECN"); break; + case BIT_ULL(VIRTIO_NET_F_GUEST_ACCECN): + seq_puts(seq, " VIRTIO_NET_F_GUEST_ACCECN"); + break; case BIT_ULL(VIRTIO_NET_F_GUEST_UFO): seq_puts(seq, " VIRTIO_NET_F_GUEST_UFO"); break; @@ -90,6 +93,9 @@ static void print_feature_bits_all(struct seq_file *seq, u64 features) case BIT_ULL(VIRTIO_NET_F_HOST_ECN): seq_puts(seq, " VIRTIO_NET_F_HOST_ECN"); break; + case BIT_ULL(VIRTIO_NET_F_HOST_ACCECN): + seq_puts(seq, " VIRTIO_NET_F_HOST_ACCECN"); + break; case BIT_ULL(VIRTIO_NET_F_HOST_UFO): seq_puts(seq, " VIRTIO_NET_F_HOST_UFO"); break; diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 75dabb763c65..0cf86b026828 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -11,7 +11,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) { - switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN_FLAGS) { case VIRTIO_NET_HDR_GSO_TCPV4: return protocol == cpu_to_be16(ETH_P_IP); case VIRTIO_NET_HDR_GSO_TCPV6: @@ -31,7 +31,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, if (skb->protocol) return 0; - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN_FLAGS) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP_L4: @@ -58,7 +58,7 @@ static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int ip_proto; if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) { - switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN_FLAGS) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; @@ -84,7 +84,9 @@ static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, return -EINVAL; } - if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN) + if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ACCECN) + gso_type |= SKB_GSO_TCP_ACCECN; + else if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) @@ -159,7 +161,7 @@ static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - switch (gso_type & ~SKB_GSO_TCP_ECN) { + switch (gso_type & ~(SKB_GSO_TCP_ECN | SKB_GSO_TCP_ACCECN)) { case SKB_GSO_UDP: /* UFO may not include transport header in gso_size. */ nh_off -= thlen; @@ -231,7 +233,9 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; else return -EINVAL; - if (sinfo->gso_type & SKB_GSO_TCP_ECN) + if (sinfo->gso_type & SKB_GSO_TCP_ACCECN) + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ACCECN; + else if (sinfo->gso_type & SKB_GSO_TCP_ECN) hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; @@ -282,7 +286,7 @@ virtio_net_hdr_tnl_to_skb(struct sk_buff *skb, return -EINVAL; /* The UDP tunnel must carry a GSO packet, but no UFO. */ - gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN | + gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN_FLAGS | VIRTIO_NET_HDR_GSO_UDP_TUNNEL); if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP) return -EINVAL; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 1db45b01532b..af5bfe45aa1f 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,6 +56,8 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_HOST_ACCECN 25 /* Host can handle GSO of AccECN */ +#define VIRTIO_NET_F_GUEST_ACCECN 26 /* Guest can handle GSO of AccECN */ #define VIRTIO_NET_F_DEVICE_STATS 50 /* Device can provide device-level statistics. */ #define VIRTIO_NET_F_VQ_NOTF_COAL 52 /* Device supports virtqueue notification coalescing */ #define VIRTIO_NET_F_NOTF_COAL 53 /* Device supports notifications coalescing */ @@ -165,6 +167,9 @@ struct virtio_net_hdr_v1 { #define VIRTIO_NET_HDR_GSO_UDP_TUNNEL (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 | \ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6) #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ +#define VIRTIO_NET_HDR_GSO_ACCECN 0x10 /* TCP AccECN segmentation */ +#define VIRTIO_NET_HDR_GSO_ECN_FLAGS (VIRTIO_NET_HDR_GSO_ECN | \ + VIRTIO_NET_HDR_GSO_ACCECN) __u8 gso_type; __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ __virtio16 gso_size; /* Bytes to append to hdr_len per frame */ -- 2.34.1