In IPv6 Destination options processing function check if net->ipv6.sysctl.max_dst_opts_cnt is zero up front. If it is zero then drop the packet since Destination Options processing is disabled. Similarly, in IPv6 hop-by-hop options processing function check if net->ipv6.sysctl.max_hbh_opts_cnt is zero up front. If it is zero then drop the packet since Hop-by-Hop Options processing is disabled. Signed-off-by: Tom Herbert --- net/ipv6/exthdrs.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 54088fa0c09d..b9d186784b96 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -301,9 +301,11 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) #endif struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); - int extlen; + int extlen, max_opts_cnt; - if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || + max_opts_cnt = READ_ONCE(net->ipv6.sysctl.max_dst_opts_cnt); + if (!max_opts_cnt || + !pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { __IP6_INC_STATS(dev_net(dst_dev(dst)), idev, @@ -322,8 +324,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(false, skb, - READ_ONCE(net->ipv6.sysctl.max_dst_opts_cnt))) { + if (ip6_parse_tlv(false, skb, max_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -1033,7 +1034,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(skb->dev); - int extlen; + int extlen, max_opts_cnt; /* * skb_network_header(skb) is equal to skb->data, and @@ -1041,7 +1042,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb) * sizeof(struct ipv6hdr) by definition of * hop-by-hop options. */ - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || + max_opts_cnt = READ_ONCE(net->ipv6.sysctl.max_hbh_opts_cnt); + if (!max_opts_cnt || + !pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + ((skb_transport_header(skb)[1] + 1) << 3)))) { fail_and_free: @@ -1054,8 +1057,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(true, skb, - READ_ONCE(net->ipv6.sysctl.max_hbh_opts_cnt))) { + if (ip6_parse_tlv(true, skb, max_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); -- 2.43.0 Move IPV6_TLV_TNL_ENCAP_LIMIT to uapi/linux/in6.h to be with the rest of the TLV definitions. Label each of the TLV definitions as to whether they are a Hop-by-Hop option, Destination option, or both. Signed-off-by: Tom Herbert --- include/uapi/linux/in6.h | 21 ++++++++++++++------- include/uapi/linux/ip6_tunnel.h | 1 - 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 5a47339ef7d7..438283dc5fde 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -140,14 +140,21 @@ struct in6_flowlabel_req { /* * IPv6 TLV options. + * + * Hop-by-Hop and Destination options share the same number space. + * For each option below whether it is a Hop-by-Hop option or + * a Destination option is indicated by HBH or DestOpt. */ -#define IPV6_TLV_PAD1 0 -#define IPV6_TLV_PADN 1 -#define IPV6_TLV_ROUTERALERT 5 -#define IPV6_TLV_CALIPSO 7 /* RFC 5570 */ -#define IPV6_TLV_IOAM 49 /* RFC 9486 */ -#define IPV6_TLV_JUMBO 194 -#define IPV6_TLV_HAO 201 /* home address option */ +#define IPV6_TLV_PAD1 0 /* HBH or DestOpt */ +#define IPV6_TLV_PADN 1 /* HBH or DestOpt */ +#define IPV6_TLV_TNL_ENCAP_LIMIT 4 /* RFC 2473, DestOpt */ +#define IPV6_TLV_ROUTERALERT 5 /* HBH */ +#define IPV6_TLV_CALIPSO 7 /* RFC 5570, HBH */ +#define IPV6_TLV_IOAM 49 /* RFC 9486, HBH or Destopt + * IOAM sent and rcvd as HBH + */ +#define IPV6_TLV_JUMBO 194 /* HBH */ +#define IPV6_TLV_HAO 201 /* home address option, DestOpt */ /* * IPV6 socket options diff --git a/include/uapi/linux/ip6_tunnel.h b/include/uapi/linux/ip6_tunnel.h index 85182a839d42..35af4d9c35fb 100644 --- a/include/uapi/linux/ip6_tunnel.h +++ b/include/uapi/linux/ip6_tunnel.h @@ -6,7 +6,6 @@ #include /* For IFNAMSIZ. */ #include /* For struct in6_addr. */ -#define IPV6_TLV_TNL_ENCAP_LIMIT 4 #define IPV6_DEFAULT_TNL_ENCAP_LIMIT 4 /* don't add encapsulation limit if one isn't present in inner packet */ -- 2.43.0 IPV6_TLV_TNL_ENCAP_LIMIT is a recognized Destination option that is processed in ip_tunnel.c. Add a case for it in the switch in ip6_parse_tlv so that it is recognized as a known option. Also remove the unlikely around the check for max_count < 0 since the default limits for HBH and Destination options can be less than zero. Signed-off-by: Tom Herbert --- net/ipv6/exthdrs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index b9d186784b96..6925cfad94d2 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -122,7 +122,7 @@ static bool ip6_parse_tlv(bool hopbyhop, int tlv_count = 0; int padlen = 0; - if (unlikely(max_count < 0)) { + if (max_count < 0) { disallow_unknowns = true; max_count = -max_count; } @@ -202,6 +202,16 @@ static bool ip6_parse_tlv(bool hopbyhop, if (!ipv6_dest_hao(skb, off)) return false; break; +#endif +#if IS_ENABLED(CONFIG_IPV6_TUNNEL) + case IPV6_TLV_TNL_ENCAP_LIMIT: + /* The tunnel encapsulation option. + * This is handled in ip6_tunnel.c so + * we don't need to do anything here + * except to accept it as a recognized + * option + */ + break; #endif default: if (!ip6_tlvopt_unknown(skb, off, -- 2.43.0 Set the default limits of non-padding Hop-by-Hop and Destination options to 2. This means that if a packet contains more then two non-padding options then it will be dropped. The previous limit was 8, but that was too liberal considering that the stack only support two Destination Options and the most Hop-by-Hop options likely to ever be in the same packet are IOAM and JUMBO. The limit can be increased via sysctl for private use and experimenation. Signed-off-by: Tom Herbert --- include/net/ipv6.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c7f597da01cd..31d270c8c2e4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -86,9 +86,12 @@ struct ip_tunnel_info; * silently discarded. */ -/* Default limits for Hop-by-Hop and Destination options */ -#define IP6_DEFAULT_MAX_DST_OPTS_CNT 8 -#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 8 +/* Default limits for Hop-by-Hop and Destination non-padding options. The + * default value for both is 2. This sets a limit at two non-padding options + * (see sysctl documention) + */ +#define IP6_DEFAULT_MAX_DST_OPTS_CNT 2 +#define IP6_DEFAULT_MAX_HBH_OPTS_CNT 2 #define IP6_DEFAULT_MAX_DST_OPTS_LEN INT_MAX /* No limit */ #define IP6_DEFAULT_MAX_HBH_OPTS_LEN INT_MAX /* No limit */ -- 2.43.0 In the descriptions of max_dst_opts_number and max_hbh_opts_number sysctls add text about how a zero setting means that a packet with any Destination or Hop-by-Hop options is dropped. Report the defaults for max_dst_opts_number and max_hbh_opts_number are 2 which means up to two options may be accepted. Signed-off-by: Tom Herbert --- Documentation/networking/ip-sysctl.rst | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bc9a01606daf..4f568b0e39d2 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2475,19 +2475,25 @@ mld_qrv - INTEGER max_dst_opts_number - INTEGER Maximum number of non-padding TLVs allowed in a Destination - options extension header. If this value is less than zero - then unknown options are disallowed and the number of known - TLVs allowed is the absolute value of this number. + options extension header. If this value is zero then receive + Destination Options processing is disabled in which case packets + with the Destination Options extension header are dropped. If + this value is less than zero then unknown options are disallowed + and the number of known TLVs allowed is the absolute value of + this number. - Default: 8 + Default: 2 max_hbh_opts_number - INTEGER Maximum number of non-padding TLVs allowed in a Hop-by-Hop - options extension header. If this value is less than zero - then unknown options are disallowed and the number of known - TLVs allowed is the absolute value of this number. - - Default: 8 + options extension header. If this value is zero then receive + Hop-by-Hop Options processing is disabled in which case packets + with the Hop-by-Hop Options extension header are dropped. + If this value is less than zero then unknown options are disallowed + and the number of known TLVs allowed is the absolute value of this + number. + + Default: 2 max_dst_opts_length - INTEGER Maximum length allowed for a Destination options extension -- 2.43.0 RFC8200 highly recommends that different Extension Headers be send in a prescibed order and all Extension Header types occur at most once in a packet with the exception of Destination Options that may occur twice. This patch enforces the ordering be folowed in received packets. The allowed order of Extension Headers is: IPv6 header Hop-by-Hop Options header Destination Options before the Routing Header Routing header Fragment header Authentication header Encapsulating Security Payload header Destination Options header Upper-Layer header Each Extension Header may be present only once in a packet. net.ipv6.enforce_ext_hdr_order is a sysctl to enable or disable enforcement of xtension Header order. If it is set to zero then Extension Header order and number of occurences is not checked in receive processeing (except for Hop-by-Hop Options that must be the first Extension Header and can only occur once in a packet. Signed-off-by: Tom Herbert --- include/net/netns/ipv6.h | 1 + include/net/protocol.h | 14 +++++++++++++ net/ipv6/af_inet6.c | 1 + net/ipv6/exthdrs.c | 2 ++ net/ipv6/ip6_input.c | 42 ++++++++++++++++++++++++++++++++++++++ net/ipv6/reassembly.c | 1 + net/ipv6/sysctl_net_ipv6.c | 7 +++++++ net/ipv6/xfrm6_protocol.c | 2 ++ 8 files changed, 70 insertions(+) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 34bdb1308e8f..2db56718ea60 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -61,6 +61,7 @@ struct netns_sysctl_ipv6 { u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; u8 icmpv6_errors_extension_mask; + u8 enforce_ext_hdr_order; }; struct netns_ipv6 { diff --git a/include/net/protocol.h b/include/net/protocol.h index b2499f88f8f8..0f1676625570 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -50,6 +50,19 @@ struct net_protocol { }; #if IS_ENABLED(CONFIG_IPV6) + +/* Order of extension headers as prescribed in RFC8200. The ordering and + * number of extension headers in a packet can be enforced in IPv6 receive + * processing. + */ +#define IPV6_EXT_HDR_ORDER_HOP BIT(0) +#define IPV6_EXT_HDR_ORDER_DEST_BEFORE_RH BIT(1) +#define IPV6_EXT_HDR_ORDER_ROUTING BIT(2) +#define IPV6_EXT_HDR_ORDER_FRAGMENT BIT(3) +#define IPV6_EXT_HDR_ORDER_AUTH BIT(4) +#define IPV6_EXT_HDR_ORDER_ESP BIT(5) +#define IPV6_EXT_HDR_ORDER_DEST BIT(6) + struct inet6_protocol { int (*handler)(struct sk_buff *skb); @@ -61,6 +74,7 @@ struct inet6_protocol { unsigned int flags; /* INET6_PROTO_xxx */ u32 secret; + u32 ext_hdr_order; }; #define INET6_PROTO_NOPOLICY 0x1 diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bd29840659f3..43097360ce64 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -980,6 +980,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; net->ipv6.sysctl.fib_notify_on_flag_change = 0; + net->ipv6.sysctl.enforce_ext_hdr_order = 1; atomic_set(&net->ipv6.fib6_sernum, 1); net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6925cfad94d2..4ab94c8cddb9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -845,11 +845,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) static const struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, .flags = INET6_PROTO_NOPOLICY, + .ext_hdr_order = IPV6_EXT_HDR_ORDER_ROUTING, }; static const struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, .flags = INET6_PROTO_NOPOLICY, + .ext_hdr_order = IPV6_EXT_HDR_ORDER_DEST, }; static const struct inet6_protocol nodata_protocol = { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 168ec07e31cc..ab921c0a94af 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -355,6 +355,27 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, ip6_sublist_rcv(&sublist, curr_dev, curr_net); } +static u32 check_dst_opts_before_rh(const struct inet6_protocol *ipprot, + u32 ext_hdrs) +{ + /* Check if Destination Options before the Routing Header are + * present. + */ + if (ipprot->ext_hdr_order != IPV6_EXT_HDR_ORDER_ROUTING || + !(ext_hdrs | IPV6_EXT_HDR_ORDER_DEST)) + return ext_hdrs; + + /* We have Destination Options before the Routing Header. Set + * the mask of recived extension headers to reflect that. We promote + * the bit from indicating just Destination Options present to + * Destination Options before the Routing Header being present + */ + ext_hdrs = (ext_hdrs & ~IPV6_EXT_HDR_ORDER_DEST) | + IPV6_EXT_HDR_ORDER_DEST_BEFORE_RH; + + return ext_hdrs; +} + INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); /* @@ -366,6 +387,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; + u32 ext_hdrs = 0; SKB_DR(reason); bool raw; @@ -427,6 +449,26 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, goto discard; } } + + if (ipprot->ext_hdr_order && + READ_ONCE(net->ipv6.sysctl.enforce_ext_hdr_order)) { + /* The protocol is an extension header and EH ordering + * is being enforced. Discard packet if we've already + * seen this EH or one that is lower in the order list + */ + if (ipprot->ext_hdr_order <= ext_hdrs) { + /* Check if there's Destination Options + * before the Routing Header + */ + ext_hdrs = check_dst_opts_before_rh(ipprot, + ext_hdrs); + if (ipprot->ext_hdr_order <= ext_hdrs) + goto discard; + } + + ext_hdrs |= ipprot->ext_hdr_order; + } + if (!(ipprot->flags & INET6_PROTO_NOPOLICY)) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { SKB_DR_SET(reason, XFRM_POLICY); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 25ec8001898d..91dba72c5a3c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -414,6 +414,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) static const struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, + .ext_hdr_order = IPV6_EXT_HDR_ORDER_FRAGMENT, }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d2cd33e2698d..543b6acdb11d 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -213,6 +213,13 @@ static struct ctl_table ipv6_table_template[] = { .proc_handler = proc_doulongvec_minmax, .extra2 = &ioam6_id_wide_max, }, + { + .procname = "enforce_ext_hdr_order", + .data = &init_net.ipv6.sysctl.enforce_ext_hdr_order, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + }, }; static struct ctl_table ipv6_rotable[] = { diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index ea2f805d3b01..5826edf67f64 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -197,12 +197,14 @@ static const struct inet6_protocol esp6_protocol = { .handler = xfrm6_esp_rcv, .err_handler = xfrm6_esp_err, .flags = INET6_PROTO_NOPOLICY, + .ext_hdr_order = IPV6_EXT_HDR_ORDER_ESP, }; static const struct inet6_protocol ah6_protocol = { .handler = xfrm6_ah_rcv, .err_handler = xfrm6_ah_err, .flags = INET6_PROTO_NOPOLICY, + .ext_hdr_order = IPV6_EXT_HDR_ORDER_AUTH }; static const struct inet6_protocol ipcomp6_protocol = { -- 2.43.0 Document the enforce_ext_hdr_order sysctl that controls whether Extension Header order is enforced on receive. Signed-off-by: Tom Herbert --- Documentation/networking/ip-sysctl.rst | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 4f568b0e39d2..1b12b955fa34 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2581,6 +2581,34 @@ ioam6_id_wide - LONG INTEGER Default: 0xFFFFFFFFFFFFFF +enforce_ext_hdr_order - BOOLEAN + Enforce recommended Extension Header ordering in RFC8200. + If the sysctl is set to 1 then the ordering the ordering is + enforced in received packets and each Extension Header + may be present at most once per packet. If the sysctl is + set to 0 then ordering is not enforced and Extension Headers + may be present in any order and have any number of + occurences per packet (except for Hop-by-Hop Options). + + The Extension Header order is: + + IPv6 header + Hop-by-Hop Options header + Destination Options before the Routing header + Routing header + Fragment header + Authentication header + Encapsulating Security Payload header + Destination Options header + Upper-Layer header + + Possible values: + + - 0 (disabled) + - 1 (enabled) + + Default: 1 (enabled) + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER -- 2.43.0